]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo.hs
Merge branch 'dev-phylo' of https://gitlab.iscpif.fr/gargantext/haskell-gargantext...
[gargantext.git] / src / Gargantext / Core / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveAnyClass #-}
26 {-# LANGUAGE TemplateHaskell #-}
27
28 module Gargantext.Core.Viz.Phylo where
29
30 import Control.DeepSeq
31 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON,defaultOptions)
33 import Data.Map (Map)
34 import Data.Set (Set)
35 import Data.Swagger
36 import Data.Text (Text)
37 import Data.Vector (Vector)
38 import GHC.Generics (Generic)
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Prelude
41 import Gargantext.Core.Text.Context (TermList)
42
43 --------------------
44 -- | PhyloParam | --
45 --------------------
46
47
48 -- | Global parameters of a Phylo
49 data PhyloParam =
50 PhyloParam { _phyloParam_version :: !Text -- Double ?
51 , _phyloParam_software :: !Software
52 , _phyloParam_query :: !PhyloQueryBuild
53 } deriving (Generic, Show, Eq)
54
55
56 -- | Software parameters
57 data Software =
58 Software { _software_name :: !Text
59 , _software_version :: !Text
60 } deriving (Generic, Show, Eq)
61
62
63 ---------------
64 -- | Phylo | --
65 ---------------
66
67
68 -- | Phylo datatype of a phylomemy
69 -- Duration : time Segment of the whole Phylo
70 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
71 -- Periods : list of all the periods of a Phylo
72 data Phylo =
73 Phylo { _phylo_duration :: !(Start, End)
74 , _phylo_foundations :: !PhyloFoundations
75 , _phylo_periods :: [PhyloPeriod]
76 , _phylo_docsByYears :: !(Map Date Double)
77 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
78 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
79 , _phylo_param :: !PhyloParam
80 }
81 deriving (Generic, Show, Eq)
82
83
84 -- | The foundations of a phylomemy created from a given TermList
85 data PhyloFoundations =
86 PhyloFoundations { _phylo_foundationsRoots :: !(Vector Ngrams)
87 , _phylo_foundationsTermsList :: !TermList
88 } deriving (Generic, Show, Eq)
89
90
91 -- | Date : a simple Integer
92 type Date = Int
93
94 -- | UTCTime in seconds since UNIX epoch
95 -- type Start = POSIXTime
96 -- type End = POSIXTime
97 type Start = Date
98 type End = Date
99
100
101 ---------------------
102 -- | PhyloPeriod | --
103 ---------------------
104
105
106 -- | PhyloStep : steps of phylomemy on temporal axis
107 -- Period: tuple (start date, end date) of the step of the phylomemy
108 -- Levels: levels of granularity
109 data PhyloPeriod =
110 PhyloPeriod { _phylo_periodId :: !PhyloPeriodId
111 , _phylo_periodLevels :: ![PhyloLevel]
112 }
113 deriving (Generic, Show, Eq)
114
115
116 --------------------
117 -- | PhyloLevel | --
118 --------------------
119
120
121 -- | PhyloLevel : levels of phylomemy on level axis
122 -- Levels description:
123 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
124 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
125 -- Level 1: First level of clustering
126 -- Level N: Nth level of clustering
127 data PhyloLevel =
128 PhyloLevel { _phylo_levelId :: !PhyloLevelId
129 , _phylo_levelGroups :: ![PhyloGroup]
130 }
131 deriving (Generic, Show, Eq)
132
133
134 --------------------
135 -- | PhyloGroup | --
136 --------------------
137
138
139 -- | PhyloGroup : group of ngrams at each level and step
140 -- Label : maybe has a label as text
141 -- Ngrams: set of terms that build the group
142 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
143 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
144 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
145 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
146 data PhyloGroup =
147 PhyloGroup { _phylo_groupId :: !PhyloGroupId
148 , _phylo_groupLabel :: !Text
149 , _phylo_groupNgrams :: ![Int]
150 , _phylo_groupNgramsMeta :: !(Map Text [Double])
151 , _phylo_groupMeta :: !(Map Text Double)
152 , _phylo_groupBranchId :: !(Maybe PhyloBranchId)
153 , _phylo_groupCooc :: !(Map (Int,Int) Double)
154
155 , _phylo_groupPeriodParents :: ![Pointer]
156 , _phylo_groupPeriodChilds :: ![Pointer]
157
158 , _phylo_groupLevelParents :: ![Pointer]
159 , _phylo_groupLevelChilds :: ![Pointer]
160 }
161 deriving (Generic, NFData, Show, Eq, Ord)
162
163 -- instance NFData PhyloGroup
164
165
166 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
167 type Level = Int
168 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
169 type Index = Int
170
171
172 type PhyloPeriodId = (Start, End)
173 type PhyloLevelId = (PhyloPeriodId, Level)
174 type PhyloGroupId = (PhyloLevelId, Index)
175 type PhyloBranchId = (Level, Index)
176
177
178 -- | Weight : A generic mesure that can be associated with an Id
179 type Weight = Double
180 -- | Pointer : A weighted linked with a given PhyloGroup
181 type Pointer = (PhyloGroupId, Weight)
182 -- | Ngrams : a contiguous sequence of n terms
183 type Ngrams = Text
184
185
186 --------------------
187 -- | Aggregates | --
188 --------------------
189
190
191 -- | Document : a piece of Text linked to a Date
192 data Document = Document
193 { date :: !Date
194 , text :: ![Ngrams]
195 } deriving (Show,Generic,NFData)
196
197 -- | Clique : Set of ngrams cooccurring in the same Document
198 type Clique = Set Ngrams
199 -- | Support : Number of Documents where a Clique occurs
200 type Support = Int
201 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
202 data PhyloFis = PhyloFis
203 { _phyloFis_clique :: !Clique
204 , _phyloFis_support :: !Support
205 , _phyloFis_period :: !(Date,Date)
206 } deriving (Generic,NFData,Show,Eq)
207
208 -- | A list of clustered PhyloGroup
209 type PhyloCluster = [PhyloGroup]
210
211
212 -- | A PhyloGroup in a Graph
213 type GroupNode = PhyloGroup
214 -- | A weighted links between two PhyloGroups in a Graph
215 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
216 -- | The association as a Graph between a list of Nodes and a list of Edges
217 type GroupGraph = ([GroupNode],[GroupEdge])
218
219
220 ---------------
221 -- | Error | --
222 ---------------
223
224
225 data PhyloError = LevelDoesNotExist
226 | LevelUnassigned
227 deriving (Show)
228
229
230 -----------------
231 -- | Cluster | --
232 -----------------
233
234
235 -- | Cluster constructors
236 data Cluster = Fis FisParams
237 | RelatedComponents RCParams
238 | Louvain LouvainParams
239 deriving (Generic, Show, Eq, Read)
240
241 -- | Parameters for Fis clustering
242 data FisParams = FisParams
243 { _fis_keepMinorFis :: !Bool
244 , _fis_minSupport :: !Support
245 , _fis_minSize :: !Int
246 } deriving (Generic, Show, Eq, Read)
247
248 -- | Parameters for RelatedComponents clustering
249 data RCParams = RCParams
250 { _rc_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
251
252 -- | Parameters for Louvain clustering
253 data LouvainParams = LouvainParams
254 { _louvain_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
255
256
257 -------------------
258 -- | Proximity | --
259 -------------------
260
261
262 -- | Proximity constructors
263 data Proximity = WeightedLogJaccard WLJParams
264 | Hamming HammingParams
265 | Filiation
266 deriving (Generic, Show, Eq, Read)
267
268 -- | Parameters for WeightedLogJaccard proximity
269 data WLJParams = WLJParams
270 { _wlj_threshold :: !Double
271 , _wlj_sensibility :: !Double
272 } deriving (Generic, Show, Eq, Read)
273
274 -- | Parameters for Hamming proximity
275 data HammingParams = HammingParams
276 { _hamming_threshold :: !Double } deriving (Generic, Show, Eq, Read)
277
278
279 ----------------
280 -- | Filter | --
281 ----------------
282
283
284 -- | Filter constructors
285 data Filter = LonelyBranch LBParams
286 | SizeBranch SBParams
287 deriving (Generic, Show, Eq)
288
289 -- | Parameters for LonelyBranch filter
290 data LBParams = LBParams
291 { _lb_periodsInf :: !Int
292 , _lb_periodsSup :: !Int
293 , _lb_minNodes :: !Int } deriving (Generic, Show, Eq)
294
295 -- | Parameters for SizeBranch filter
296 data SBParams = SBParams
297 { _sb_minSize :: !Int } deriving (Generic, Show, Eq)
298
299
300 ----------------
301 -- | Metric | --
302 ----------------
303
304
305 -- | Metric constructors
306 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
307
308
309 ----------------
310 -- | Tagger | --
311 ----------------
312
313
314 -- | Tagger constructors
315 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
316 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
317
318
319 --------------
320 -- | Sort | --
321 --------------
322
323
324 -- | Sort constructors
325 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
326 data Order = Asc | Desc deriving (Generic, Show, Read)
327
328
329 --------------------
330 -- | PhyloQuery | --
331 --------------------
332
333
334 -- | A Phyloquery describes a phylomemic reconstruction
335 data PhyloQueryBuild = PhyloQueryBuild
336 { _q_phyloTitle :: !Text
337 , _q_phyloDesc :: !Text
338
339 -- Grain and Steps for the PhyloPeriods
340 , _q_periodGrain :: !Int
341 , _q_periodSteps :: !Int
342
343 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
344 , _q_contextualUnit :: !Cluster
345 , _q_contextualUnitMetrics :: ![Metric]
346 , _q_contextualUnitFilters :: ![Filter]
347
348 -- Inter-temporal matching method of the Phylo
349 , _q_interTemporalMatching :: !Proximity
350 , _q_interTemporalMatchingFrame :: !Int
351 , _q_interTemporalMatchingFrameTh :: !Double
352
353 , _q_reBranchThr :: !Double
354 , _q_reBranchNth :: !Int
355
356 -- Last level of reconstruction
357 , _q_nthLevel :: !Level
358 -- Clustering method used from level 1 to nthLevel
359 , _q_nthCluster :: !Cluster
360 } deriving (Generic, Show, Eq)
361
362 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
363 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
364 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
365
366 -------------------
367 -- | PhyloView | --
368 -------------------
369
370
371 -- | A PhyloView is the output type of a Phylo
372 data PhyloView = PhyloView
373 { _pv_param :: !PhyloParam
374 , _pv_title :: !Text
375 , _pv_description :: !Text
376 , _pv_filiation :: !Filiation
377 , _pv_level :: !Level
378 , _pv_periods :: ![PhyloPeriodId]
379 , _pv_metrics :: !(Map Text [Double])
380 , _pv_branches :: ![PhyloBranch]
381 , _pv_nodes :: ![PhyloNode]
382 , _pv_edges :: ![PhyloEdge]
383 } deriving (Generic, Show)
384
385 -- | A phyloview is made of PhyloBranches, edges and nodes
386 data PhyloBranch = PhyloBranch
387 { _pb_id :: !PhyloBranchId
388 , _pb_peak :: !Text
389 , _pb_metrics :: !(Map Text [Double])
390 } deriving (Generic, Show)
391
392 data PhyloEdge = PhyloEdge
393 { _pe_source :: !PhyloGroupId
394 , _pe_target :: !PhyloGroupId
395 , _pe_type :: !EdgeType
396 , _pe_weight :: !Weight
397 } deriving (Generic, Show)
398
399 data PhyloNode = PhyloNode
400 { _pn_id :: !PhyloGroupId
401 , _pn_bid :: !(Maybe PhyloBranchId)
402 , _pn_label :: !Text
403 , _pn_idx :: ![Int]
404 , _pn_ngrams :: !(Maybe [Ngrams])
405 , _pn_metrics :: !(Map Text [Double])
406 , _pn_cooc :: !(Map (Int,Int) Double)
407 , _pn_parents :: !(Maybe [PhyloGroupId])
408 , _pn_childs :: ![PhyloNode]
409 } deriving (Generic, Show)
410
411 ------------------------
412 -- | PhyloQueryView | --
413 ------------------------
414
415
416 data ExportMode = Json | Dot | Svg
417 deriving (Generic, Show, Read)
418 data DisplayMode = Flat | Nested
419 deriving (Generic, Show, Read)
420
421 -- | A PhyloQueryView describes a Phylo as an output view
422 data PhyloQueryView = PhyloQueryView
423 { _qv_lvl :: !Level
424
425 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
426 , _qv_filiation :: !Filiation
427
428 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
429 , _qv_levelChilds :: !Bool
430 , _qv_levelChildsDepth :: !Level
431
432 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
433 -- Firstly the metrics, then the filters and the taggers
434 , _qv_metrics :: ![Metric]
435 , _qv_filters :: ![Filter]
436 , _qv_taggers :: ![Tagger]
437
438 -- An asc or desc sort to apply to the PhyloGraph
439 , _qv_sort :: !(Maybe (Sort,Order))
440
441 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
442 , _qv_export :: !ExportMode
443 , _qv_display :: !DisplayMode
444 , _qv_verbose :: !Bool
445 }
446
447
448 ----------------
449 -- | Lenses | --
450 ----------------
451
452
453 makeLenses ''PhyloParam
454 makeLenses ''Software
455 --
456 makeLenses ''Phylo
457 makeLenses ''PhyloFoundations
458 makeLenses ''PhyloGroup
459 makeLenses ''PhyloLevel
460 makeLenses ''PhyloPeriod
461 makeLenses ''PhyloFis
462 --
463 makeLenses ''Proximity
464 makeLenses ''Cluster
465 makeLenses ''Filter
466 --
467 makeLenses ''PhyloQueryBuild
468 makeLenses ''PhyloQueryView
469 --
470 makeLenses ''PhyloView
471 makeLenses ''PhyloBranch
472 makeLenses ''PhyloNode
473 makeLenses ''PhyloEdge
474
475
476 ------------------------
477 -- | JSON instances | --
478 ------------------------
479
480
481 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
482 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
483 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
484 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
485 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
486 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
487 --
488 $(deriveJSON (unPrefix "_software_" ) ''Software )
489 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
490 --
491 $(deriveJSON defaultOptions ''Filter )
492 $(deriveJSON defaultOptions ''Metric )
493 $(deriveJSON defaultOptions ''Cluster )
494 $(deriveJSON defaultOptions ''Proximity )
495 --
496 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
497 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
498 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
499 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
500 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
501 --
502 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
503 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
504 --
505 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
506 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
507 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
508 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
509 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
510
511 $(deriveJSON defaultOptions ''Filiation )
512 $(deriveJSON defaultOptions ''EdgeType )
513
514 ---------------------------
515 -- | Swagger instances | --
516 ---------------------------
517
518 instance ToSchema Phylo where
519 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
520 instance ToSchema PhyloFoundations where
521 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
522 instance ToSchema PhyloPeriod where
523 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
524 instance ToSchema PhyloLevel where
525 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
526 instance ToSchema PhyloGroup where
527 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
528 instance ToSchema PhyloFis where
529 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
530 instance ToSchema Software where
531 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
532 instance ToSchema PhyloParam where
533 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
534 instance ToSchema Filter
535 instance ToSchema Metric
536 instance ToSchema Cluster
537 instance ToSchema Proximity where
538 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
539 instance ToSchema FisParams where
540 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
541 instance ToSchema HammingParams where
542 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
543 instance ToSchema LouvainParams where
544 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
545 instance ToSchema RCParams where
546 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
547 instance ToSchema WLJParams where
548 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
549 instance ToSchema LBParams where
550 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
551 instance ToSchema SBParams where
552 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
553 instance ToSchema PhyloQueryBuild where
554 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
555 instance ToSchema PhyloView where
556 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
557 instance ToSchema PhyloBranch where
558 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
559 instance ToSchema PhyloEdge where
560 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
561 instance ToSchema PhyloNode where
562 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
563 instance ToSchema Filiation
564 instance ToSchema EdgeType
565
566 ----------------------------
567 -- | TODO XML instances | --
568 ----------------------------
569