]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
Merge branch 'dev-doc-table-optimization' of ssh://gitlab.iscpif.fr:20022/gargantext...
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveAnyClass #-}
26 {-# LANGUAGE TemplateHaskell #-}
27
28 module Gargantext.Viz.Phylo where
29
30 import Control.DeepSeq
31 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON,defaultOptions)
33 import Data.Map (Map)
34 import Data.Maybe (Maybe)
35 import Data.Set (Set)
36 import Data.Swagger
37 import Data.Text (Text)
38 import Data.Vector (Vector)
39 import GHC.Generics (Generic)
40 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
41 import Gargantext.Prelude
42 import Gargantext.Text.Context (TermList)
43 import Prelude (Bounded)
44
45 --------------------
46 -- | PhyloParam | --
47 --------------------
48
49
50 -- | Global parameters of a Phylo
51 data PhyloParam =
52 PhyloParam { _phyloParam_version :: Text -- Double ?
53 , _phyloParam_software :: Software
54 , _phyloParam_query :: PhyloQueryBuild
55 } deriving (Generic, Show, Eq)
56
57
58 -- | Software parameters
59 data Software =
60 Software { _software_name :: Text
61 , _software_version :: Text
62 } deriving (Generic, Show, Eq)
63
64
65 ---------------
66 -- | Phylo | --
67 ---------------
68
69
70 -- | Phylo datatype of a phylomemy
71 -- Duration : time Segment of the whole Phylo
72 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
73 -- Periods : list of all the periods of a Phylo
74 data Phylo =
75 Phylo { _phylo_duration :: (Start, End)
76 , _phylo_foundations :: PhyloFoundations
77 , _phylo_periods :: [PhyloPeriod]
78 , _phylo_docsByYears :: Map Date Double
79 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
80 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
81 , _phylo_param :: PhyloParam
82 }
83 deriving (Generic, Show, Eq)
84
85
86 -- | The foundations of a phylomemy created from a given TermList
87 data PhyloFoundations =
88 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
89 , _phylo_foundationsTermsList :: TermList
90 } deriving (Generic, Show, Eq)
91
92
93 -- | Date : a simple Integer
94 type Date = Int
95
96 -- | UTCTime in seconds since UNIX epoch
97 -- type Start = POSIXTime
98 -- type End = POSIXTime
99 type Start = Date
100 type End = Date
101
102
103 ---------------------
104 -- | PhyloPeriod | --
105 ---------------------
106
107
108 -- | PhyloStep : steps of phylomemy on temporal axis
109 -- Period: tuple (start date, end date) of the step of the phylomemy
110 -- Levels: levels of granularity
111 data PhyloPeriod =
112 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
113 , _phylo_periodLevels :: [PhyloLevel]
114 }
115 deriving (Generic, Show, Eq)
116
117
118 --------------------
119 -- | PhyloLevel | --
120 --------------------
121
122
123 -- | PhyloLevel : levels of phylomemy on level axis
124 -- Levels description:
125 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
126 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
127 -- Level 1: First level of clustering
128 -- Level N: Nth level of clustering
129 data PhyloLevel =
130 PhyloLevel { _phylo_levelId :: PhyloLevelId
131 , _phylo_levelGroups :: [PhyloGroup]
132 }
133 deriving (Generic, Show, Eq)
134
135
136 --------------------
137 -- | PhyloGroup | --
138 --------------------
139
140
141 -- | PhyloGroup : group of ngrams at each level and step
142 -- Label : maybe has a label as text
143 -- Ngrams: set of terms that build the group
144 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
145 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
146 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
147 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
148 data PhyloGroup =
149 PhyloGroup { _phylo_groupId :: PhyloGroupId
150 , _phylo_groupLabel :: Text
151 , _phylo_groupNgrams :: [Int]
152 , _phylo_groupNgramsMeta :: Map Text [Double]
153 , _phylo_groupMeta :: Map Text Double
154 , _phylo_groupBranchId :: Maybe PhyloBranchId
155 , _phylo_groupCooc :: !(Map (Int,Int) Double)
156
157 , _phylo_groupPeriodParents :: [Pointer]
158 , _phylo_groupPeriodChilds :: [Pointer]
159
160 , _phylo_groupLevelParents :: [Pointer]
161 , _phylo_groupLevelChilds :: [Pointer]
162 }
163 deriving (Generic, NFData, Show, Eq, Ord)
164
165 -- instance NFData PhyloGroup
166
167
168 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
169 type Level = Int
170 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
171 type Index = Int
172
173
174 type PhyloPeriodId = (Start, End)
175 type PhyloLevelId = (PhyloPeriodId, Level)
176 type PhyloGroupId = (PhyloLevelId, Index)
177 type PhyloBranchId = (Level, Index)
178
179
180 -- | Weight : A generic mesure that can be associated with an Id
181 type Weight = Double
182 -- | Pointer : A weighted linked with a given PhyloGroup
183 type Pointer = (PhyloGroupId, Weight)
184 -- | Ngrams : a contiguous sequence of n terms
185 type Ngrams = Text
186
187
188 --------------------
189 -- | Aggregates | --
190 --------------------
191
192
193 -- | Document : a piece of Text linked to a Date
194 data Document = Document
195 { date :: Date
196 , text :: [Ngrams]
197 } deriving (Show,Generic,NFData)
198
199 -- | Clique : Set of ngrams cooccurring in the same Document
200 type Clique = Set Ngrams
201 -- | Support : Number of Documents where a Clique occurs
202 type Support = Int
203 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
204 data PhyloFis = PhyloFis
205 { _phyloFis_clique :: Clique
206 , _phyloFis_support :: Support
207 , _phyloFis_period :: (Date,Date)
208 } deriving (Generic,NFData,Show,Eq)
209
210 -- | A list of clustered PhyloGroup
211 type PhyloCluster = [PhyloGroup]
212
213
214 -- | A PhyloGroup in a Graph
215 type GroupNode = PhyloGroup
216 -- | A weighted links between two PhyloGroups in a Graph
217 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
218 -- | The association as a Graph between a list of Nodes and a list of Edges
219 type GroupGraph = ([GroupNode],[GroupEdge])
220
221
222 ---------------
223 -- | Error | --
224 ---------------
225
226
227 data PhyloError = LevelDoesNotExist
228 | LevelUnassigned
229 deriving (Show)
230
231
232 -----------------
233 -- | Cluster | --
234 -----------------
235
236
237 -- | Cluster constructors
238 data Cluster = Fis FisParams
239 | RelatedComponents RCParams
240 | Louvain LouvainParams
241 deriving (Generic, Show, Eq, Read)
242
243 -- | Parameters for Fis clustering
244 data FisParams = FisParams
245 { _fis_keepMinorFis :: Bool
246 , _fis_minSupport :: Support
247 , _fis_minSize :: Int
248 } deriving (Generic, Show, Eq, Read)
249
250 -- | Parameters for RelatedComponents clustering
251 data RCParams = RCParams
252 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
253
254 -- | Parameters for Louvain clustering
255 data LouvainParams = LouvainParams
256 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
257
258
259 -------------------
260 -- | Proximity | --
261 -------------------
262
263
264 -- | Proximity constructors
265 data Proximity = WeightedLogJaccard WLJParams
266 | Hamming HammingParams
267 | Filiation
268 deriving (Generic, Show, Eq, Read)
269
270 -- | Parameters for WeightedLogJaccard proximity
271 data WLJParams = WLJParams
272 { _wlj_threshold :: Double
273 , _wlj_sensibility :: Double
274 } deriving (Generic, Show, Eq, Read)
275
276 -- | Parameters for Hamming proximity
277 data HammingParams = HammingParams
278 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
279
280
281 ----------------
282 -- | Filter | --
283 ----------------
284
285
286 -- | Filter constructors
287 data Filter = LonelyBranch LBParams
288 | SizeBranch SBParams
289 deriving (Generic, Show, Eq)
290
291 -- | Parameters for LonelyBranch filter
292 data LBParams = LBParams
293 { _lb_periodsInf :: Int
294 , _lb_periodsSup :: Int
295 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
296
297 -- | Parameters for SizeBranch filter
298 data SBParams = SBParams
299 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
300
301
302 ----------------
303 -- | Metric | --
304 ----------------
305
306
307 -- | Metric constructors
308 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
309
310
311 ----------------
312 -- | Tagger | --
313 ----------------
314
315
316 -- | Tagger constructors
317 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
318 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
319
320
321 --------------
322 -- | Sort | --
323 --------------
324
325
326 -- | Sort constructors
327 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
328 data Order = Asc | Desc deriving (Generic, Show, Read)
329
330
331 --------------------
332 -- | PhyloQuery | --
333 --------------------
334
335
336 -- | A Phyloquery describes a phylomemic reconstruction
337 data PhyloQueryBuild = PhyloQueryBuild
338 { _q_phyloTitle :: Text
339 , _q_phyloDesc :: Text
340
341 -- Grain and Steps for the PhyloPeriods
342 , _q_periodGrain :: Int
343 , _q_periodSteps :: Int
344
345 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
346 , _q_contextualUnit :: Cluster
347 , _q_contextualUnitMetrics :: [Metric]
348 , _q_contextualUnitFilters :: [Filter]
349
350 -- Inter-temporal matching method of the Phylo
351 , _q_interTemporalMatching :: Proximity
352 , _q_interTemporalMatchingFrame :: Int
353 , _q_interTemporalMatchingFrameTh :: Double
354
355 , _q_reBranchThr :: Double
356 , _q_reBranchNth :: Int
357
358 -- Last level of reconstruction
359 , _q_nthLevel :: Level
360 -- Clustering method used from level 1 to nthLevel
361 , _q_nthCluster :: Cluster
362 } deriving (Generic, Show, Eq)
363
364 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
365 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
366 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
367
368 -------------------
369 -- | PhyloView | --
370 -------------------
371
372
373 -- | A PhyloView is the output type of a Phylo
374 data PhyloView = PhyloView
375 { _pv_param :: PhyloParam
376 , _pv_title :: Text
377 , _pv_description :: Text
378 , _pv_filiation :: Filiation
379 , _pv_level :: Level
380 , _pv_periods :: [PhyloPeriodId]
381 , _pv_metrics :: Map Text [Double]
382 , _pv_branches :: [PhyloBranch]
383 , _pv_nodes :: [PhyloNode]
384 , _pv_edges :: [PhyloEdge]
385 } deriving (Generic, Show)
386
387 -- | A phyloview is made of PhyloBranches, edges and nodes
388 data PhyloBranch = PhyloBranch
389 { _pb_id :: PhyloBranchId
390 , _pb_peak :: Text
391 , _pb_metrics :: Map Text [Double]
392 } deriving (Generic, Show)
393
394 data PhyloEdge = PhyloEdge
395 { _pe_source :: PhyloGroupId
396 , _pe_target :: PhyloGroupId
397 , _pe_type :: EdgeType
398 , _pe_weight :: Weight
399 } deriving (Generic, Show)
400
401 data PhyloNode = PhyloNode
402 { _pn_id :: PhyloGroupId
403 , _pn_bid :: Maybe PhyloBranchId
404 , _pn_label :: Text
405 , _pn_idx :: [Int]
406 , _pn_ngrams :: Maybe [Ngrams]
407 , _pn_metrics :: Map Text [Double]
408 , _pn_cooc :: Map (Int,Int) Double
409 , _pn_parents :: Maybe [PhyloGroupId]
410 , _pn_childs :: [PhyloNode]
411 } deriving (Generic, Show)
412
413 ------------------------
414 -- | PhyloQueryView | --
415 ------------------------
416
417
418 data ExportMode = Json | Dot | Svg
419 deriving (Generic, Show, Read)
420 data DisplayMode = Flat | Nested
421 deriving (Generic, Show, Read)
422
423 -- | A PhyloQueryView describes a Phylo as an output view
424 data PhyloQueryView = PhyloQueryView
425 { _qv_lvl :: Level
426
427 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
428 , _qv_filiation :: Filiation
429
430 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
431 , _qv_levelChilds :: Bool
432 , _qv_levelChildsDepth :: Level
433
434 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
435 -- Firstly the metrics, then the filters and the taggers
436 , _qv_metrics :: [Metric]
437 , _qv_filters :: [Filter]
438 , _qv_taggers :: [Tagger]
439
440 -- An asc or desc sort to apply to the PhyloGraph
441 , _qv_sort :: Maybe (Sort,Order)
442
443 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
444 , _qv_export :: ExportMode
445 , _qv_display :: DisplayMode
446 , _qv_verbose :: Bool
447 }
448
449
450 ----------------
451 -- | Lenses | --
452 ----------------
453
454
455 makeLenses ''PhyloParam
456 makeLenses ''Software
457 --
458 makeLenses ''Phylo
459 makeLenses ''PhyloFoundations
460 makeLenses ''PhyloGroup
461 makeLenses ''PhyloLevel
462 makeLenses ''PhyloPeriod
463 makeLenses ''PhyloFis
464 --
465 makeLenses ''Proximity
466 makeLenses ''Cluster
467 makeLenses ''Filter
468 --
469 makeLenses ''PhyloQueryBuild
470 makeLenses ''PhyloQueryView
471 --
472 makeLenses ''PhyloView
473 makeLenses ''PhyloBranch
474 makeLenses ''PhyloNode
475 makeLenses ''PhyloEdge
476
477
478 ------------------------
479 -- | JSON instances | --
480 ------------------------
481
482
483 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
484 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
485 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
486 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
487 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
488 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
489 --
490 $(deriveJSON (unPrefix "_software_" ) ''Software )
491 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
492 --
493 $(deriveJSON defaultOptions ''Filter )
494 $(deriveJSON defaultOptions ''Metric )
495 $(deriveJSON defaultOptions ''Cluster )
496 $(deriveJSON defaultOptions ''Proximity )
497 --
498 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
499 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
500 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
501 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
502 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
503 --
504 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
505 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
506 --
507 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
508 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
509 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
510 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
511 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
512
513 $(deriveJSON defaultOptions ''Filiation )
514 $(deriveJSON defaultOptions ''EdgeType )
515
516 ---------------------------
517 -- | Swagger instances | --
518 ---------------------------
519
520 instance ToSchema Phylo where
521 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
522 instance ToSchema PhyloFoundations where
523 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
524 instance ToSchema PhyloPeriod where
525 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
526 instance ToSchema PhyloLevel where
527 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
528 instance ToSchema PhyloGroup where
529 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
530 instance ToSchema PhyloFis where
531 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
532 instance ToSchema Software where
533 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
534 instance ToSchema PhyloParam where
535 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
536 instance ToSchema Filter
537 instance ToSchema Metric
538 instance ToSchema Cluster
539 instance ToSchema Proximity where
540 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
541 instance ToSchema FisParams where
542 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
543 instance ToSchema HammingParams where
544 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
545 instance ToSchema LouvainParams where
546 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
547 instance ToSchema RCParams where
548 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
549 instance ToSchema WLJParams where
550 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
551 instance ToSchema LBParams where
552 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
553 instance ToSchema SBParams where
554 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
555 instance ToSchema PhyloQueryBuild where
556 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
557 instance ToSchema PhyloView where
558 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
559 instance ToSchema PhyloBranch where
560 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
561 instance ToSchema PhyloEdge where
562 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
563 instance ToSchema PhyloNode where
564 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
565 instance ToSchema Filiation
566 instance ToSchema EdgeType
567
568 ----------------------------
569 -- | TODO XML instances | --
570 ----------------------------
571