]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/LegacyPhylo.hs
[WIP] backup during the vacations
[gargantext.git] / src / Gargantext / Core / Viz / LegacyPhylo.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveAnyClass #-}
26 {-# LANGUAGE TemplateHaskell #-}
27
28 module Gargantext.Core.Viz.LegacyPhylo where
29
30 import Control.DeepSeq
31 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON,defaultOptions)
33 import Data.Map (Map)
34 import Data.Set (Set)
35 import Data.Swagger
36 import Data.Text (Text)
37 import Data.Vector (Vector)
38 import GHC.Generics (Generic)
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Prelude
41 import Gargantext.Core.Text.Context (TermList)
42
43 --------------------
44 -- | PhyloParam | --
45 --------------------
46
47
48 -- | Global parameters of a Phylo
49 data PhyloParam =
50 PhyloParam { _phyloParam_version :: !Text -- Double ?
51 , _phyloParam_software :: !Software
52 , _phyloParam_query :: !PhyloQueryBuild
53 } deriving (Generic, Show, Eq)
54
55
56 -- | Software parameters
57 data Software =
58 Software { _software_name :: !Text
59 , _software_version :: !Text
60 } deriving (Generic, Show, Eq)
61
62
63 ---------------
64 -- | Phylo | --
65 ---------------
66
67
68 -- | Phylo datatype of a phylomemy
69 -- Duration : time Segment of the whole Phylo
70 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
71 -- Periods : list of all the periods of a Phylo
72 data Phylo =
73 Phylo { _phylo_duration :: !(Start, End)
74 , _phylo_foundations :: !PhyloFoundations
75 , _phylo_periods :: [PhyloPeriod]
76 , _phylo_docsByYears :: !(Map Date Double)
77 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
78 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
79 , _phylo_param :: !PhyloParam
80 }
81 deriving (Generic, Show, Eq)
82
83
84 -- | The foundations of a phylomemy created from a given TermList
85 data PhyloFoundations =
86 PhyloFoundations { _phylo_foundationsRoots :: !(Vector Ngrams)
87 , _phylo_foundationsTermsList :: !TermList
88 } deriving (Generic, Show, Eq)
89
90
91 -- | Date : a simple Integer
92 type Date = Int
93
94 -- | UTCTime in seconds since UNIX epoch
95 -- type Start = POSIXTime
96 -- type End = POSIXTime
97 type Start = Date
98 type End = Date
99
100
101 ---------------------
102 -- | PhyloPeriod | --
103 ---------------------
104
105
106 -- | PhyloStep : steps of phylomemy on temporal axis
107 -- Period: tuple (start date, end date) of the step of the phylomemy
108 -- Levels: levels of granularity
109 data PhyloPeriod =
110 PhyloPeriod { _phylo_periodId :: !PhyloPeriodId
111 , _phylo_periodLevels :: ![PhyloLevel]
112 }
113 deriving (Generic, Show, Eq)
114
115
116 --------------------
117 -- | PhyloLevel | --
118 --------------------
119
120
121 -- | PhyloLevel : levels of phylomemy on level axis
122 -- Levels description:
123 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
124 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
125 -- Level 1: First level of clustering
126 -- Level N: Nth level of clustering
127 data PhyloLevel =
128 PhyloLevel { _phylo_levelId :: !PhyloLevelId
129 , _phylo_levelGroups :: ![PhyloGroup]
130 }
131 deriving (Generic, Show, Eq)
132
133
134 --------------------
135 -- | PhyloGroup | --
136 --------------------
137
138
139 -- | PhyloGroup : group of ngrams at each level and step
140 -- Label : maybe has a label as text
141 -- Ngrams: set of terms that build the group
142 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
143 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
144 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
145 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
146 data PhyloGroup =
147 PhyloGroup { _phylo_groupId :: !PhyloGroupId
148 , _phylo_groupLabel :: !Text
149 , _phylo_groupNgrams :: ![Int]
150 , _phylo_groupNgramsMeta :: !(Map Text [Double])
151 , _phylo_groupMeta :: !(Map Text Double)
152 , _phylo_groupBranchId :: !(Maybe PhyloBranchId)
153 , _phylo_groupCooc :: !(Map (Int,Int) Double)
154
155 , _phylo_groupPeriodParents :: ![Pointer]
156 , _phylo_groupPeriodChilds :: ![Pointer]
157
158 , _phylo_groupLevelParents :: ![Pointer]
159 , _phylo_groupLevelChilds :: ![Pointer]
160 }
161 deriving (Generic, NFData, Show, Eq, Ord)
162
163 -- instance NFData PhyloGroup
164
165
166 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
167 type Level = Int
168 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
169 type Index = Int
170
171
172 type PhyloPeriodId = (Start, End)
173 type PhyloLevelId = (PhyloPeriodId, Level)
174 type PhyloGroupId = (PhyloLevelId, Index)
175 type PhyloBranchId = (Level, Index)
176
177
178 -- | Weight : A generic mesure that can be associated with an Id
179 type Weight = Double
180 -- | Pointer : A weighted linked with a given PhyloGroup
181 type Pointer = (PhyloGroupId, Weight)
182 -- | Ngrams : a contiguous sequence of n terms
183 type Ngrams = Text
184
185
186 --------------------
187 -- | Aggregates | --
188 --------------------
189
190
191 -- | Document : a piece of Text linked to a Date
192 data Document = Document
193 { date :: !Date
194 , text :: ![Ngrams]
195 } deriving (Show,Generic,NFData)
196
197 -- | Clique : Set of ngrams cooccurring in the same Document
198 type Clique = Set Ngrams
199 -- | Support : Number of Documents where a Clique occurs
200 type Support = Int
201 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
202 data PhyloFis = PhyloFis
203 { _phyloFis_clique :: !Clique
204 , _phyloFis_support :: !Support
205 , _phyloFis_period :: !(Date,Date)
206 } deriving (Generic,NFData,Show,Eq)
207
208 -- | A list of clustered PhyloGroup
209 type PhyloCluster = [PhyloGroup]
210
211
212 -- | A PhyloGroup in a Graph
213 type GroupNode = PhyloGroup
214 -- | A weighted links between two PhyloGroups in a Graph
215 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
216 -- | The association as a Graph between a list of Nodes and a list of Edges
217 type GroupGraph = ([GroupNode],[GroupEdge])
218
219
220 ---------------
221 -- | Error | --
222 ---------------
223
224
225 data PhyloError = LevelDoesNotExist
226 | LevelUnassigned
227 deriving (Show)
228
229
230 -----------------
231 -- | Cluster | --
232 -----------------
233
234
235 -- | Cluster constructors
236 data Cluster = Fis FisParams
237 | RelatedComponents RCParams
238 | Louvain LouvainParams
239 deriving (Generic, Show, Eq, Read)
240
241 -- | Parameters for Fis clustering
242 data FisParams = FisParams
243 { _fis_keepMinorFis :: !Bool
244 , _fis_minSupport :: !Support
245 , _fis_minSize :: !Int
246 } deriving (Generic, Show, Eq, Read)
247
248 -- | Parameters for RelatedComponents clustering
249 data RCParams = RCParams
250 { _rc_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
251
252 -- | Parameters for Louvain clustering
253 data LouvainParams = LouvainParams
254 { _louvain_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
255
256
257 -------------------
258 -- | Proximity | --
259 -------------------
260
261
262 -- | Proximity constructors
263 data Proximity = WeightedLogJaccard WLJParams
264 | WeightedLogSim WLJParams
265 | Hamming HammingParams
266 | Filiation
267 deriving (Generic, Show, Eq, Read)
268
269 -- | Parameters for WeightedLogJaccard and WeightedLogSim proximity
270 data WLJParams = WLJParams
271 { _wlj_threshold :: !Double
272 , _wlj_sensibility :: !Double
273 } deriving (Generic, Show, Eq, Read)
274
275 -- | Parameters for Hamming proximity
276 data HammingParams = HammingParams
277 { _hamming_threshold :: !Double } deriving (Generic, Show, Eq, Read)
278
279
280 ----------------
281 -- | Filter | --
282 ----------------
283
284
285 -- | Filter constructors
286 data Filter = LonelyBranch LBParams
287 | SizeBranch SBParams
288 deriving (Generic, Show, Eq)
289
290 -- | Parameters for LonelyBranch filter
291 data LBParams = LBParams
292 { _lb_periodsInf :: !Int
293 , _lb_periodsSup :: !Int
294 , _lb_minNodes :: !Int } deriving (Generic, Show, Eq)
295
296 -- | Parameters for SizeBranch filter
297 data SBParams = SBParams
298 { _sb_minSize :: !Int } deriving (Generic, Show, Eq)
299
300
301 ----------------
302 -- | Metric | --
303 ----------------
304
305
306 -- | Metric constructors
307 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
308
309
310 ----------------
311 -- | Tagger | --
312 ----------------
313
314
315 -- | Tagger constructors
316 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
317 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
318
319
320 --------------
321 -- | Sort | --
322 --------------
323
324
325 -- | Sort constructors
326 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
327 data Order = Asc | Desc deriving (Generic, Show, Read)
328
329
330 --------------------
331 -- | PhyloQuery | --
332 --------------------
333
334
335 -- | A Phyloquery describes a phylomemic reconstruction
336 data PhyloQueryBuild = PhyloQueryBuild
337 { _q_phyloTitle :: !Text
338 , _q_phyloDesc :: !Text
339
340 -- Grain and Steps for the PhyloPeriods
341 , _q_periodGrain :: !Int
342 , _q_periodSteps :: !Int
343
344 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
345 , _q_contextualUnit :: !Cluster
346 , _q_contextualUnitMetrics :: ![Metric]
347 , _q_contextualUnitFilters :: ![Filter]
348
349 -- Inter-temporal matching method of the Phylo
350 , _q_interTemporalMatching :: !Proximity
351 , _q_interTemporalMatchingFrame :: !Int
352 , _q_interTemporalMatchingFrameTh :: !Double
353
354 , _q_reBranchThr :: !Double
355 , _q_reBranchNth :: !Int
356
357 -- Last level of reconstruction
358 , _q_nthLevel :: !Level
359 -- Clustering method used from level 1 to nthLevel
360 , _q_nthCluster :: !Cluster
361 } deriving (Generic, Show, Eq)
362
363 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
364 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
365 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
366
367 -------------------
368 -- | PhyloView | --
369 -------------------
370
371
372 -- | A PhyloView is the output type of a Phylo
373 data PhyloView = PhyloView
374 { _pv_param :: !PhyloParam
375 , _pv_title :: !Text
376 , _pv_description :: !Text
377 , _pv_filiation :: !Filiation
378 , _pv_level :: !Level
379 , _pv_periods :: ![PhyloPeriodId]
380 , _pv_metrics :: !(Map Text [Double])
381 , _pv_branches :: ![PhyloBranch]
382 , _pv_nodes :: ![PhyloNode]
383 , _pv_edges :: ![PhyloEdge]
384 } deriving (Generic, Show)
385
386 -- | A phyloview is made of PhyloBranches, edges and nodes
387 data PhyloBranch = PhyloBranch
388 { _pb_id :: !PhyloBranchId
389 , _pb_peak :: !Text
390 , _pb_metrics :: !(Map Text [Double])
391 } deriving (Generic, Show)
392
393 data PhyloEdge = PhyloEdge
394 { _pe_source :: !PhyloGroupId
395 , _pe_target :: !PhyloGroupId
396 , _pe_type :: !EdgeType
397 , _pe_weight :: !Weight
398 } deriving (Generic, Show)
399
400 data PhyloNode = PhyloNode
401 { _pn_id :: !PhyloGroupId
402 , _pn_bid :: !(Maybe PhyloBranchId)
403 , _pn_label :: !Text
404 , _pn_idx :: ![Int]
405 , _pn_ngrams :: !(Maybe [Ngrams])
406 , _pn_metrics :: !(Map Text [Double])
407 , _pn_cooc :: !(Map (Int,Int) Double)
408 , _pn_parents :: !(Maybe [PhyloGroupId])
409 , _pn_childs :: ![PhyloNode]
410 } deriving (Generic, Show)
411
412 ------------------------
413 -- | PhyloQueryView | --
414 ------------------------
415
416
417 data ExportMode = Json | Dot | Svg
418 deriving (Generic, Show, Read)
419 data DisplayMode = Flat | Nested
420 deriving (Generic, Show, Read)
421
422 -- | A PhyloQueryView describes a Phylo as an output view
423 data PhyloQueryView = PhyloQueryView
424 { _qv_lvl :: !Level
425
426 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
427 , _qv_filiation :: !Filiation
428
429 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
430 , _qv_levelChilds :: !Bool
431 , _qv_levelChildsDepth :: !Level
432
433 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
434 -- Firstly the metrics, then the filters and the taggers
435 , _qv_metrics :: ![Metric]
436 , _qv_filters :: ![Filter]
437 , _qv_taggers :: ![Tagger]
438
439 -- An asc or desc sort to apply to the PhyloGraph
440 , _qv_sort :: !(Maybe (Sort,Order))
441
442 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
443 , _qv_export :: !ExportMode
444 , _qv_display :: !DisplayMode
445 , _qv_verbose :: !Bool
446 }
447
448
449 ----------------
450 -- | Lenses | --
451 ----------------
452
453
454 makeLenses ''PhyloParam
455 makeLenses ''Software
456 --
457 makeLenses ''Phylo
458 makeLenses ''PhyloFoundations
459 makeLenses ''PhyloGroup
460 makeLenses ''PhyloLevel
461 makeLenses ''PhyloPeriod
462 makeLenses ''PhyloFis
463 --
464 makeLenses ''Proximity
465 makeLenses ''Cluster
466 makeLenses ''Filter
467 --
468 makeLenses ''PhyloQueryBuild
469 makeLenses ''PhyloQueryView
470 --
471 makeLenses ''PhyloView
472 makeLenses ''PhyloBranch
473 makeLenses ''PhyloNode
474 makeLenses ''PhyloEdge
475
476
477 ------------------------
478 -- | JSON instances | --
479 ------------------------
480
481
482 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
483 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
484 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
485 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
486 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
487 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
488 --
489 $(deriveJSON (unPrefix "_software_" ) ''Software )
490 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
491 --
492 $(deriveJSON defaultOptions ''Filter )
493 $(deriveJSON defaultOptions ''Metric )
494 $(deriveJSON defaultOptions ''Cluster )
495 $(deriveJSON defaultOptions ''Proximity )
496 --
497 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
498 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
499 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
500 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
501 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
502 --
503 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
504 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
505 --
506 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
507 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
508 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
509 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
510 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
511
512 $(deriveJSON defaultOptions ''Filiation )
513 $(deriveJSON defaultOptions ''EdgeType )
514
515 ---------------------------
516 -- | Swagger instances | --
517 ---------------------------
518
519 instance ToSchema Phylo where
520 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
521 instance ToSchema PhyloFoundations where
522 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
523 instance ToSchema PhyloPeriod where
524 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
525 instance ToSchema PhyloLevel where
526 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
527 instance ToSchema PhyloGroup where
528 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
529 instance ToSchema PhyloFis where
530 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
531 instance ToSchema Software where
532 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
533 instance ToSchema PhyloParam where
534 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
535 instance ToSchema Filter
536 instance ToSchema Metric
537 instance ToSchema Cluster
538 instance ToSchema Proximity where
539 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
540 instance ToSchema FisParams where
541 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
542 instance ToSchema HammingParams where
543 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
544 instance ToSchema LouvainParams where
545 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
546 instance ToSchema RCParams where
547 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
548 instance ToSchema WLJParams where
549 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
550 instance ToSchema LBParams where
551 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
552 instance ToSchema SBParams where
553 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
554 instance ToSchema PhyloQueryBuild where
555 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
556 instance ToSchema PhyloView where
557 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
558 instance ToSchema PhyloBranch where
559 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
560 instance ToSchema PhyloEdge where
561 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
562 instance ToSchema PhyloNode where
563 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
564 instance ToSchema Filiation
565 instance ToSchema EdgeType
566
567 ----------------------------
568 -- | TODO XML instances | --
569 ----------------------------
570