]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
[FACTORING] G.Text.Terms.
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.DeepSeq
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Map (Map)
36 import Data.Maybe (Maybe)
37 import Data.Set (Set)
38 import Data.Swagger
39 import Data.Text (Text)
40 import Data.Vector (Vector)
41 import GHC.Generics (Generic)
42 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
43 import Gargantext.Prelude
44 import Gargantext.Text.Context (TermList)
45 import Prelude (Bounded)
46
47 --------------------
48 -- | PhyloParam | --
49 --------------------
50
51
52 -- | Global parameters of a Phylo
53 data PhyloParam =
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
58
59
60 -- | Software parameters
61 data Software =
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
65
66
67 ---------------
68 -- | Phylo | --
69 ---------------
70
71
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
76 data Phylo =
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_docsByYears :: Map Date Double
81 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
82 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
83 , _phylo_param :: PhyloParam
84 }
85 deriving (Generic, Show, Eq)
86
87
88 -- | The foundations of a phylomemy created from a given TermList
89 data PhyloFoundations =
90 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
91 , _phylo_foundationsTermsList :: TermList
92 } deriving (Generic, Show, Eq)
93
94
95 -- | Date : a simple Integer
96 type Date = Int
97
98 -- | UTCTime in seconds since UNIX epoch
99 -- type Start = POSIXTime
100 -- type End = POSIXTime
101 type Start = Date
102 type End = Date
103
104
105 ---------------------
106 -- | PhyloPeriod | --
107 ---------------------
108
109
110 -- | PhyloStep : steps of phylomemy on temporal axis
111 -- Period: tuple (start date, end date) of the step of the phylomemy
112 -- Levels: levels of granularity
113 data PhyloPeriod =
114 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
115 , _phylo_periodLevels :: [PhyloLevel]
116 }
117 deriving (Generic, Show, Eq)
118
119
120 --------------------
121 -- | PhyloLevel | --
122 --------------------
123
124
125 -- | PhyloLevel : levels of phylomemy on level axis
126 -- Levels description:
127 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
128 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
129 -- Level 1: First level of clustering
130 -- Level N: Nth level of clustering
131 data PhyloLevel =
132 PhyloLevel { _phylo_levelId :: PhyloLevelId
133 , _phylo_levelGroups :: [PhyloGroup]
134 }
135 deriving (Generic, Show, Eq)
136
137
138 --------------------
139 -- | PhyloGroup | --
140 --------------------
141
142
143 -- | PhyloGroup : group of ngrams at each level and step
144 -- Label : maybe has a label as text
145 -- Ngrams: set of terms that build the group
146 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
147 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
148 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
149 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
150 data PhyloGroup =
151 PhyloGroup { _phylo_groupId :: PhyloGroupId
152 , _phylo_groupLabel :: Text
153 , _phylo_groupNgrams :: [Int]
154 , _phylo_groupNgramsMeta :: Map Text [Double]
155 , _phylo_groupMeta :: Map Text Double
156 , _phylo_groupBranchId :: Maybe PhyloBranchId
157 , _phylo_groupCooc :: !(Map (Int,Int) Double)
158
159 , _phylo_groupPeriodParents :: [Pointer]
160 , _phylo_groupPeriodChilds :: [Pointer]
161
162 , _phylo_groupLevelParents :: [Pointer]
163 , _phylo_groupLevelChilds :: [Pointer]
164 }
165 deriving (Generic, NFData, Show, Eq, Ord)
166
167 -- instance NFData PhyloGroup
168
169
170 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
171 type Level = Int
172 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
173 type Index = Int
174
175
176 type PhyloPeriodId = (Start, End)
177 type PhyloLevelId = (PhyloPeriodId, Level)
178 type PhyloGroupId = (PhyloLevelId, Index)
179 type PhyloBranchId = (Level, Index)
180
181
182 -- | Weight : A generic mesure that can be associated with an Id
183 type Weight = Double
184 -- | Pointer : A weighted linked with a given PhyloGroup
185 type Pointer = (PhyloGroupId, Weight)
186 -- | Ngrams : a contiguous sequence of n terms
187 type Ngrams = Text
188
189
190 --------------------
191 -- | Aggregates | --
192 --------------------
193
194
195 -- | Document : a piece of Text linked to a Date
196 data Document = Document
197 { date :: Date
198 , text :: [Ngrams]
199 } deriving (Show,Generic,NFData)
200
201 -- | Clique : Set of ngrams cooccurring in the same Document
202 type Clique = Set Ngrams
203 -- | Support : Number of Documents where a Clique occurs
204 type Support = Int
205 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
206 data PhyloFis = PhyloFis
207 { _phyloFis_clique :: Clique
208 , _phyloFis_support :: Support
209 , _phyloFis_period :: (Date,Date)
210 } deriving (Generic,NFData,Show,Eq)
211
212 -- | A list of clustered PhyloGroup
213 type PhyloCluster = [PhyloGroup]
214
215
216 -- | A PhyloGroup in a Graph
217 type GroupNode = PhyloGroup
218 -- | A weighted links between two PhyloGroups in a Graph
219 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
220 -- | The association as a Graph between a list of Nodes and a list of Edges
221 type GroupGraph = ([GroupNode],[GroupEdge])
222
223
224 ---------------
225 -- | Error | --
226 ---------------
227
228
229 data PhyloError = LevelDoesNotExist
230 | LevelUnassigned
231 deriving (Show)
232
233
234 -----------------
235 -- | Cluster | --
236 -----------------
237
238
239 -- | Cluster constructors
240 data Cluster = Fis FisParams
241 | RelatedComponents RCParams
242 | Louvain LouvainParams
243 deriving (Generic, Show, Eq, Read)
244
245 -- | Parameters for Fis clustering
246 data FisParams = FisParams
247 { _fis_keepMinorFis :: Bool
248 , _fis_minSupport :: Support
249 , _fis_minSize :: Int
250 } deriving (Generic, Show, Eq, Read)
251
252 -- | Parameters for RelatedComponents clustering
253 data RCParams = RCParams
254 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
255
256 -- | Parameters for Louvain clustering
257 data LouvainParams = LouvainParams
258 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
259
260
261 -------------------
262 -- | Proximity | --
263 -------------------
264
265
266 -- | Proximity constructors
267 data Proximity = WeightedLogJaccard WLJParams
268 | Hamming HammingParams
269 | Filiation
270 deriving (Generic, Show, Eq, Read)
271
272 -- | Parameters for WeightedLogJaccard proximity
273 data WLJParams = WLJParams
274 { _wlj_threshold :: Double
275 , _wlj_sensibility :: Double
276 } deriving (Generic, Show, Eq, Read)
277
278 -- | Parameters for Hamming proximity
279 data HammingParams = HammingParams
280 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
281
282
283 ----------------
284 -- | Filter | --
285 ----------------
286
287
288 -- | Filter constructors
289 data Filter = LonelyBranch LBParams
290 | SizeBranch SBParams
291 deriving (Generic, Show, Eq)
292
293 -- | Parameters for LonelyBranch filter
294 data LBParams = LBParams
295 { _lb_periodsInf :: Int
296 , _lb_periodsSup :: Int
297 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
298
299 -- | Parameters for SizeBranch filter
300 data SBParams = SBParams
301 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
302
303
304 ----------------
305 -- | Metric | --
306 ----------------
307
308
309 -- | Metric constructors
310 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
311
312
313 ----------------
314 -- | Tagger | --
315 ----------------
316
317
318 -- | Tagger constructors
319 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
320 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
321
322
323 --------------
324 -- | Sort | --
325 --------------
326
327
328 -- | Sort constructors
329 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
330 data Order = Asc | Desc deriving (Generic, Show, Read)
331
332
333 --------------------
334 -- | PhyloQuery | --
335 --------------------
336
337
338 -- | A Phyloquery describes a phylomemic reconstruction
339 data PhyloQueryBuild = PhyloQueryBuild
340 { _q_phyloTitle :: Text
341 , _q_phyloDesc :: Text
342
343 -- Grain and Steps for the PhyloPeriods
344 , _q_periodGrain :: Int
345 , _q_periodSteps :: Int
346
347 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
348 , _q_contextualUnit :: Cluster
349 , _q_contextualUnitMetrics :: [Metric]
350 , _q_contextualUnitFilters :: [Filter]
351
352 -- Inter-temporal matching method of the Phylo
353 , _q_interTemporalMatching :: Proximity
354 , _q_interTemporalMatchingFrame :: Int
355 , _q_interTemporalMatchingFrameTh :: Double
356
357 , _q_reBranchThr :: Double
358 , _q_reBranchNth :: Int
359
360 -- Last level of reconstruction
361 , _q_nthLevel :: Level
362 -- Clustering method used from level 1 to nthLevel
363 , _q_nthCluster :: Cluster
364 } deriving (Generic, Show, Eq)
365
366 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
367 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
368 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
369
370 -------------------
371 -- | PhyloView | --
372 -------------------
373
374
375 -- | A PhyloView is the output type of a Phylo
376 data PhyloView = PhyloView
377 { _pv_param :: PhyloParam
378 , _pv_title :: Text
379 , _pv_description :: Text
380 , _pv_filiation :: Filiation
381 , _pv_level :: Level
382 , _pv_periods :: [PhyloPeriodId]
383 , _pv_metrics :: Map Text [Double]
384 , _pv_branches :: [PhyloBranch]
385 , _pv_nodes :: [PhyloNode]
386 , _pv_edges :: [PhyloEdge]
387 } deriving (Generic, Show)
388
389 -- | A phyloview is made of PhyloBranches, edges and nodes
390 data PhyloBranch = PhyloBranch
391 { _pb_id :: PhyloBranchId
392 , _pb_peak :: Text
393 , _pb_metrics :: Map Text [Double]
394 } deriving (Generic, Show)
395
396 data PhyloEdge = PhyloEdge
397 { _pe_source :: PhyloGroupId
398 , _pe_target :: PhyloGroupId
399 , _pe_type :: EdgeType
400 , _pe_weight :: Weight
401 } deriving (Generic, Show)
402
403 data PhyloNode = PhyloNode
404 { _pn_id :: PhyloGroupId
405 , _pn_bid :: Maybe PhyloBranchId
406 , _pn_label :: Text
407 , _pn_idx :: [Int]
408 , _pn_ngrams :: Maybe [Ngrams]
409 , _pn_metrics :: Map Text [Double]
410 , _pn_cooc :: Map (Int,Int) Double
411 , _pn_parents :: Maybe [PhyloGroupId]
412 , _pn_childs :: [PhyloNode]
413 } deriving (Generic, Show)
414
415 ------------------------
416 -- | PhyloQueryView | --
417 ------------------------
418
419
420 data ExportMode = Json | Dot | Svg
421 deriving (Generic, Show, Read)
422 data DisplayMode = Flat | Nested
423 deriving (Generic, Show, Read)
424
425 -- | A PhyloQueryView describes a Phylo as an output view
426 data PhyloQueryView = PhyloQueryView
427 { _qv_lvl :: Level
428
429 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
430 , _qv_filiation :: Filiation
431
432 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
433 , _qv_levelChilds :: Bool
434 , _qv_levelChildsDepth :: Level
435
436 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
437 -- Firstly the metrics, then the filters and the taggers
438 , _qv_metrics :: [Metric]
439 , _qv_filters :: [Filter]
440 , _qv_taggers :: [Tagger]
441
442 -- An asc or desc sort to apply to the PhyloGraph
443 , _qv_sort :: Maybe (Sort,Order)
444
445 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
446 , _qv_export :: ExportMode
447 , _qv_display :: DisplayMode
448 , _qv_verbose :: Bool
449 }
450
451
452 ----------------
453 -- | Lenses | --
454 ----------------
455
456
457 makeLenses ''PhyloParam
458 makeLenses ''Software
459 --
460 makeLenses ''Phylo
461 makeLenses ''PhyloFoundations
462 makeLenses ''PhyloGroup
463 makeLenses ''PhyloLevel
464 makeLenses ''PhyloPeriod
465 makeLenses ''PhyloFis
466 --
467 makeLenses ''Proximity
468 makeLenses ''Cluster
469 makeLenses ''Filter
470 --
471 makeLenses ''PhyloQueryBuild
472 makeLenses ''PhyloQueryView
473 --
474 makeLenses ''PhyloView
475 makeLenses ''PhyloBranch
476 makeLenses ''PhyloNode
477 makeLenses ''PhyloEdge
478
479
480 ------------------------
481 -- | JSON instances | --
482 ------------------------
483
484
485 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
486 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
487 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
488 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
489 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
490 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
491 --
492 $(deriveJSON (unPrefix "_software_" ) ''Software )
493 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
494 --
495 $(deriveJSON defaultOptions ''Filter )
496 $(deriveJSON defaultOptions ''Metric )
497 $(deriveJSON defaultOptions ''Cluster )
498 $(deriveJSON defaultOptions ''Proximity )
499 --
500 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
501 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
502 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
503 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
504 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
505 --
506 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
507 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
508 --
509 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
510 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
511 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
512 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
513 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
514
515 $(deriveJSON defaultOptions ''Filiation )
516 $(deriveJSON defaultOptions ''EdgeType )
517
518 ---------------------------
519 -- | Swagger instances | --
520 ---------------------------
521
522 instance ToSchema Phylo where
523 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
524 instance ToSchema PhyloFoundations where
525 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
526 instance ToSchema PhyloPeriod where
527 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
528 instance ToSchema PhyloLevel where
529 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
530 instance ToSchema PhyloGroup where
531 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
532 instance ToSchema PhyloFis where
533 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
534 instance ToSchema Software where
535 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
536 instance ToSchema PhyloParam where
537 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
538 instance ToSchema Filter
539 instance ToSchema Metric
540 instance ToSchema Cluster
541 instance ToSchema Proximity where
542 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
543 instance ToSchema FisParams where
544 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
545 instance ToSchema HammingParams where
546 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
547 instance ToSchema LouvainParams where
548 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
549 instance ToSchema RCParams where
550 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
551 instance ToSchema WLJParams where
552 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
553 instance ToSchema LBParams where
554 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
555 instance ToSchema SBParams where
556 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
557 instance ToSchema PhyloQueryBuild where
558 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
559 instance ToSchema PhyloView where
560 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
561 instance ToSchema PhyloBranch where
562 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
563 instance ToSchema PhyloEdge where
564 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
565 instance ToSchema PhyloNode where
566 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
567 instance ToSchema Filiation
568 instance ToSchema EdgeType
569
570 ----------------------------
571 -- | TODO XML instances | --
572 ----------------------------
573