]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
Add the PhyloParam to the Phylo constructor
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45
46 --------------------
47 -- | PhyloParam | --
48 --------------------
49
50
51 -- | Global parameters of a Phylo
52 data PhyloParam =
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_query :: PhyloQuery
56 } deriving (Generic, Show)
57
58
59 -- | Software parameters
60 data Software =
61 Software { _software_name :: Text
62 , _software_version :: Text
63 } deriving (Generic, Show)
64
65
66 ---------------
67 -- | Phylo | --
68 ---------------
69
70
71 -- | Phylo datatype of a phylomemy
72 -- Duration : time Segment of the whole Phylo
73 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
74 -- Periods : list of all the periods of a Phylo
75 data Phylo =
76 Phylo { _phylo_duration :: (Start, End)
77 , _phylo_foundations :: Vector Ngrams
78 , _phylo_periods :: [PhyloPeriod]
79 , _phylo_param :: PhyloParam
80 }
81 deriving (Generic, Show)
82
83
84 -- | Date : a simple Integer
85 type Date = Int
86
87 -- | UTCTime in seconds since UNIX epoch
88 -- type Start = POSIXTime
89 -- type End = POSIXTime
90 type Start = Date
91 type End = Date
92
93
94 ---------------------
95 -- | PhyloPeriod | --
96 ---------------------
97
98
99 -- | PhyloStep : steps of phylomemy on temporal axis
100 -- Period: tuple (start date, end date) of the step of the phylomemy
101 -- Levels: levels of granularity
102 data PhyloPeriod =
103 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
104 , _phylo_periodLevels :: [PhyloLevel]
105 }
106 deriving (Generic, Show)
107
108
109 --------------------
110 -- | PhyloLevel | --
111 --------------------
112
113
114 -- | PhyloLevel : levels of phylomemy on level axis
115 -- Levels description:
116 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
117 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
118 -- Level 1: First level of clustering
119 -- Level N: Nth level of clustering
120 data PhyloLevel =
121 PhyloLevel { _phylo_levelId :: PhyloLevelId
122 , _phylo_levelGroups :: [PhyloGroup]
123 }
124 deriving (Generic, Show)
125
126
127 --------------------
128 -- | PhyloGroup | --
129 --------------------
130
131
132 -- | PhyloGroup : group of ngrams at each level and step
133 -- Label : maybe has a label as text
134 -- Ngrams: set of terms that build the group
135 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
136 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
137 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
138 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
139 data PhyloGroup =
140 PhyloGroup { _phylo_groupId :: PhyloGroupId
141 , _phylo_groupLabel :: Text
142 , _phylo_groupNgrams :: [Int]
143 , _phylo_groupMeta :: Map Text Double
144 , _phylo_groupCooc :: Map (Int, Int) Double
145 , _phylo_groupBranchId :: Maybe PhyloBranchId
146
147 , _phylo_groupPeriodParents :: [Pointer]
148 , _phylo_groupPeriodChilds :: [Pointer]
149
150 , _phylo_groupLevelParents :: [Pointer]
151 , _phylo_groupLevelChilds :: [Pointer]
152 }
153 deriving (Generic, Show, Eq, Ord)
154
155
156 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
157 type Level = Int
158 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
159 type Index = Int
160
161
162 type PhyloPeriodId = (Start, End)
163 type PhyloLevelId = (PhyloPeriodId, Level)
164 type PhyloGroupId = (PhyloLevelId, Index)
165 type PhyloBranchId = (Level, Index)
166
167
168 -- | Weight : A generic mesure that can be associated with an Id
169 type Weight = Double
170 -- | Pointer : A weighted linked with a given PhyloGroup
171 type Pointer = (PhyloGroupId, Weight)
172 -- | Ngrams : a contiguous sequence of n terms
173 type Ngrams = Text
174
175
176 --------------------
177 -- | Aggregates | --
178 --------------------
179
180
181 -- | Document : a piece of Text linked to a Date
182 data Document = Document
183 { date :: Date
184 , text :: Text
185 } deriving (Show)
186
187
188 -- | Clique : Set of ngrams cooccurring in the same Document
189 type Clique = Set Ngrams
190 -- | Support : Number of Documents where a Clique occurs
191 type Support = Int
192 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
193 type PhyloFis = (Clique,Support)
194
195
196 -- | A list of clustered PhyloGroup
197 type PhyloCluster = [PhyloGroup]
198
199
200 -- | A List of PhyloGroup in a Graph
201 type GroupNodes = [PhyloGroup]
202 -- | A List of weighted links between some PhyloGroups in a Graph
203 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
204 -- | The association as a Graph between a list of Nodes and a list of Edges
205 type GroupGraph = (GroupNodes,GroupEdges)
206
207
208 ---------------
209 -- | Error | --
210 ---------------
211
212
213 data PhyloError = LevelDoesNotExist
214 | LevelUnassigned
215 deriving (Show)
216
217
218 -----------------
219 -- | Cluster | --
220 -----------------
221
222
223 -- | Cluster constructors
224 data Cluster = Fis FisParams
225 | RelatedComponents RCParams
226 | Louvain LouvainParams
227 deriving (Show)
228
229 -- | Parameters for Fis clustering
230 data FisParams = FisParams
231 { _fis_filtered :: Bool
232 , _fis_keepMinorFis :: Bool
233 , _fis_minSupport :: Support
234 } deriving (Show)
235
236 -- | Parameters for RelatedComponents clustering
237 data RCParams = RCParams
238 { _rc_proximity :: Proximity } deriving (Show)
239
240 -- | Parameters for Louvain clustering
241 data LouvainParams = LouvainParams
242 { _louvain_proximity :: Proximity } deriving (Show)
243
244
245 -------------------
246 -- | Proximity | --
247 -------------------
248
249
250 -- | Proximity constructors
251 data Proximity = WeightedLogJaccard WLJParams
252 | Hamming HammingParams
253 | Filiation
254 deriving (Show)
255
256 -- | Parameters for WeightedLogJaccard proximity
257 data WLJParams = WLJParams
258 { _wlj_threshold :: Double
259 , _wlj_sensibility :: Double
260 } deriving (Show)
261
262 -- | Parameters for Hamming proximity
263 data HammingParams = HammingParams
264 { _hamming_threshold :: Double } deriving (Show)
265
266
267 ----------------
268 -- | Filter | --
269 ----------------
270
271
272 -- | Filter constructors
273 data Filter = LonelyBranch LBParams deriving (Show)
274
275 -- | Parameters for LonelyBranch filter
276 data LBParams = LBParams
277 { _lb_periodsInf :: Int
278 , _lb_periodsSup :: Int
279 , _lb_minNodes :: Int } deriving (Show)
280
281
282 ----------------
283 -- | Metric | --
284 ----------------
285
286
287 -- | Metric constructors
288 data Metric = BranchAge deriving (Show)
289
290
291 ----------------
292 -- | Tagger | --
293 ----------------
294
295
296 -- | Tagger constructors
297 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
298
299
300 --------------
301 -- | Sort | --
302 --------------
303
304
305 -- | Sort constructors
306 data Sort = ByBranchAge deriving (Show)
307 data Order = Asc | Desc deriving (Show)
308
309
310 --------------------
311 -- | PhyloQuery | --
312 --------------------
313
314
315 -- | A Phyloquery describes a phylomemic reconstruction
316 data PhyloQuery = PhyloQuery
317 { _q_phyloTitle :: Text
318 , _q_phyloDesc :: Text
319
320 -- Grain and Steps for the PhyloPeriods
321 , _q_periodGrain :: Int
322 , _q_periodSteps :: Int
323
324 -- Clustering method for making level 1 of the Phylo
325 , _q_cluster :: Cluster
326
327 -- Inter-temporal matching method of the Phylo
328 , _q_interTemporalMatching :: Proximity
329
330 -- Last level of reconstruction
331 , _q_nthLevel :: Level
332 -- Clustering method used from level 1 to nthLevel
333 , _q_nthCluster :: Cluster
334 } deriving (Show)
335
336 data Filiation = Ascendant | Descendant | Complete deriving (Show)
337 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
338
339
340 -------------------
341 -- | PhyloView | --
342 -------------------
343
344
345 -- | A PhyloView is the output type of a Phylo
346 data PhyloView = PhyloView
347 { _phylo_viewParam :: PhyloParam
348 , _phylo_viewTitle :: Text
349 , _phylo_viewDescription :: Text
350 , _phylo_viewFiliation :: Filiation
351 , _phylo_viewMeta :: Map Text Double
352 , _phylo_viewBranches :: [PhyloBranch]
353 , _phylo_viewNodes :: [PhyloNode]
354 , _phylo_viewEdges :: [PhyloEdge]
355 } deriving (Show)
356
357 -- | A phyloview is made of PhyloBranches, edges and nodes
358 data PhyloBranch = PhyloBranch
359 { _phylo_branchId :: PhyloBranchId
360 , _phylo_branchLabel :: Text
361 , _phylo_branchMeta :: Map Text Double
362 } deriving (Show)
363
364 data PhyloEdge = PhyloEdge
365 { _phylo_edgeSource :: PhyloGroupId
366 , _phylo_edgeTarget :: PhyloGroupId
367 , _phylo_edgeType :: EdgeType
368 , _phylo_edgeWeight :: Weight
369 } deriving (Show)
370
371 data PhyloNode = PhyloNode
372 { _phylo_nodeId :: PhyloGroupId
373 , _phylo_nodeBranchId :: Maybe PhyloBranchId
374 , _phylo_nodeLabel :: Text
375 , _phylo_nodeNgramsIdx :: [Int]
376 , _phylo_nodeNgrams :: Maybe [Ngrams]
377 , _phylo_nodeMeta :: Map Text Double
378 , _phylo_nodeParent :: Maybe PhyloGroupId
379 , _phylo_nodeChilds :: [PhyloNode]
380 } deriving (Show)
381
382
383 ------------------------
384 -- | PhyloQueryView | --
385 ------------------------
386
387
388 data DisplayMode = Flat | Nested
389
390 -- | A PhyloQueryView describes a Phylo as an output view
391 data PhyloQueryView = PhyloQueryView
392 { _qv_lvl :: Level
393
394 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ?
395 , _qv_filiation :: Filiation
396
397 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
398 , _qv_childs :: Bool
399 , _qv_childsDepth :: Level
400
401 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
402 -- Firstly the metrics, then the filters and the taggers
403 , _qv_metrics :: [Metric]
404 , _qv_filters :: [Filter]
405 , _qv_taggers :: [Tagger]
406
407 -- An asc or desc sort to apply to the PhyloGraph
408 , _qv_sort :: Maybe (Sort,Order)
409
410 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
411 , _qv_display :: DisplayMode
412 , _qv_verbose :: Bool
413 }
414
415
416 ----------------
417 -- | Lenses | --
418 ----------------
419
420
421 makeLenses ''PhyloParam
422 makeLenses ''Software
423 --
424 makeLenses ''Phylo
425 makeLenses ''PhyloGroup
426 makeLenses ''PhyloLevel
427 makeLenses ''PhyloPeriod
428 --
429 makeLenses ''Proximity
430 makeLenses ''Cluster
431 makeLenses ''Filter
432 --
433 makeLenses ''PhyloQuery
434 makeLenses ''PhyloQueryView
435 --
436 makeLenses ''PhyloView
437 makeLenses ''PhyloBranch
438 makeLenses ''PhyloNode
439 makeLenses ''PhyloEdge
440
441
442 ------------------------
443 -- | JSON instances | --
444 ------------------------
445
446
447 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
448 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
449 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
450 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
451 --
452 $(deriveJSON (unPrefix "_software_" ) ''Software )
453 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
454 --
455 $(deriveJSON defaultOptions ''Cluster )
456 $(deriveJSON defaultOptions ''Proximity )
457 --
458 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
459 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
460 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
461 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
462 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
463 --
464 $(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
465
466
467 ----------------------------
468 -- | TODO XML instances | --
469 ----------------------------
470