]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
add parallelism
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
46
47 import Control.DeepSeq
48
49 --------------------
50 -- | PhyloParam | --
51 --------------------
52
53
54 -- | Global parameters of a Phylo
55 data PhyloParam =
56 PhyloParam { _phyloParam_version :: Text -- Double ?
57 , _phyloParam_software :: Software
58 , _phyloParam_query :: PhyloQueryBuild
59 } deriving (Generic, Show, Eq)
60
61
62 -- | Software parameters
63 data Software =
64 Software { _software_name :: Text
65 , _software_version :: Text
66 } deriving (Generic, Show, Eq)
67
68
69 ---------------
70 -- | Phylo | --
71 ---------------
72
73
74 -- | Phylo datatype of a phylomemy
75 -- Duration : time Segment of the whole Phylo
76 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
77 -- Periods : list of all the periods of a Phylo
78 data Phylo =
79 Phylo { _phylo_duration :: (Start, End)
80 , _phylo_foundations :: PhyloFoundations
81 , _phylo_periods :: [PhyloPeriod]
82 , _phylo_docsByYears :: Map Date Double
83 , _phylo_cooc :: Map Date (Map (Int,Int) Double)
84 , _phylo_fis :: Map (Date,Date) [PhyloFis]
85 , _phylo_param :: PhyloParam
86 }
87 deriving (Generic, Show, Eq)
88
89
90 -- | The foundations of a phylomemy created from a given TermList
91 data PhyloFoundations =
92 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
93 , _phylo_foundationsTermsList :: TermList
94 } deriving (Generic, Show, Eq)
95
96
97 -- | Date : a simple Integer
98 type Date = Int
99
100 -- | UTCTime in seconds since UNIX epoch
101 -- type Start = POSIXTime
102 -- type End = POSIXTime
103 type Start = Date
104 type End = Date
105
106
107 ---------------------
108 -- | PhyloPeriod | --
109 ---------------------
110
111
112 -- | PhyloStep : steps of phylomemy on temporal axis
113 -- Period: tuple (start date, end date) of the step of the phylomemy
114 -- Levels: levels of granularity
115 data PhyloPeriod =
116 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
117 , _phylo_periodLevels :: [PhyloLevel]
118 }
119 deriving (Generic, Show, Eq)
120
121
122 --------------------
123 -- | PhyloLevel | --
124 --------------------
125
126
127 -- | PhyloLevel : levels of phylomemy on level axis
128 -- Levels description:
129 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
130 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
131 -- Level 1: First level of clustering
132 -- Level N: Nth level of clustering
133 data PhyloLevel =
134 PhyloLevel { _phylo_levelId :: PhyloLevelId
135 , _phylo_levelGroups :: [PhyloGroup]
136 }
137 deriving (Generic, Show, Eq)
138
139
140 --------------------
141 -- | PhyloGroup | --
142 --------------------
143
144
145 -- | PhyloGroup : group of ngrams at each level and step
146 -- Label : maybe has a label as text
147 -- Ngrams: set of terms that build the group
148 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
149 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
150 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
151 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
152 data PhyloGroup =
153 PhyloGroup { _phylo_groupId :: PhyloGroupId
154 , _phylo_groupLabel :: Text
155 , _phylo_groupNgrams :: [Int]
156 , _phylo_groupMeta :: Map Text Double
157 , _phylo_groupBranchId :: Maybe PhyloBranchId
158 , _phylo_groupCooc :: Map (Int,Int) Double
159
160 , _phylo_groupPeriodParents :: [Pointer]
161 , _phylo_groupPeriodChilds :: [Pointer]
162
163 , _phylo_groupLevelParents :: [Pointer]
164 , _phylo_groupLevelChilds :: [Pointer]
165 }
166 deriving (Generic, NFData, Show, Eq, Ord)
167
168 -- instance NFData PhyloGroup
169
170
171 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
172 type Level = Int
173 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
174 type Index = Int
175
176
177 type PhyloPeriodId = (Start, End)
178 type PhyloLevelId = (PhyloPeriodId, Level)
179 type PhyloGroupId = (PhyloLevelId, Index)
180 type PhyloBranchId = (Level, Index)
181
182
183 -- | Weight : A generic mesure that can be associated with an Id
184 type Weight = Double
185 -- | Pointer : A weighted linked with a given PhyloGroup
186 type Pointer = (PhyloGroupId, Weight)
187 -- | Ngrams : a contiguous sequence of n terms
188 type Ngrams = Text
189
190
191 --------------------
192 -- | Aggregates | --
193 --------------------
194
195
196 -- | Document : a piece of Text linked to a Date
197 data Document = Document
198 { date :: Date
199 , text :: [Ngrams]
200 } deriving (Show,Generic)
201
202 -- | Clique : Set of ngrams cooccurring in the same Document
203 type Clique = Set Ngrams
204 -- | Support : Number of Documents where a Clique occurs
205 type Support = Int
206 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
207 data PhyloFis = PhyloFis
208 { _phyloFis_clique :: Clique
209 , _phyloFis_support :: Support
210 , _phyloFis_period :: (Date,Date)
211 } deriving (Generic,Show,Eq)
212
213 -- | A list of clustered PhyloGroup
214 type PhyloCluster = [PhyloGroup]
215
216
217 -- | A PhyloGroup in a Graph
218 type GroupNode = PhyloGroup
219 -- | A weighted links between two PhyloGroups in a Graph
220 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
221 -- | The association as a Graph between a list of Nodes and a list of Edges
222 type GroupGraph = ([GroupNode],[GroupEdge])
223
224
225 ---------------
226 -- | Error | --
227 ---------------
228
229
230 data PhyloError = LevelDoesNotExist
231 | LevelUnassigned
232 deriving (Show)
233
234
235 -----------------
236 -- | Cluster | --
237 -----------------
238
239
240 -- | Cluster constructors
241 data Cluster = Fis FisParams
242 | RelatedComponents RCParams
243 | Louvain LouvainParams
244 deriving (Generic, Show, Eq, Read)
245
246 -- | Parameters for Fis clustering
247 data FisParams = FisParams
248 { _fis_keepMinorFis :: Bool
249 , _fis_minSupport :: Support
250 , _fis_minSize :: Int
251 } deriving (Generic, Show, Eq, Read)
252
253 -- | Parameters for RelatedComponents clustering
254 data RCParams = RCParams
255 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
256
257 -- | Parameters for Louvain clustering
258 data LouvainParams = LouvainParams
259 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
260
261
262 -------------------
263 -- | Proximity | --
264 -------------------
265
266
267 -- | Proximity constructors
268 data Proximity = WeightedLogJaccard WLJParams
269 | Hamming HammingParams
270 | Filiation
271 deriving (Generic, Show, Eq, Read)
272
273 -- | Parameters for WeightedLogJaccard proximity
274 data WLJParams = WLJParams
275 { _wlj_threshold :: Double
276 , _wlj_sensibility :: Double
277 } deriving (Generic, Show, Eq, Read)
278
279 -- | Parameters for Hamming proximity
280 data HammingParams = HammingParams
281 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
282
283
284 ----------------
285 -- | Filter | --
286 ----------------
287
288
289 -- | Filter constructors
290 data Filter = LonelyBranch LBParams
291 | SizeBranch SBParams
292 deriving (Generic, Show, Eq)
293
294 -- | Parameters for LonelyBranch filter
295 data LBParams = LBParams
296 { _lb_periodsInf :: Int
297 , _lb_periodsSup :: Int
298 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
299
300 -- | Parameters for SizeBranch filter
301 data SBParams = SBParams
302 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
303
304
305 ----------------
306 -- | Metric | --
307 ----------------
308
309
310 -- | Metric constructors
311 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
312
313
314 ----------------
315 -- | Tagger | --
316 ----------------
317
318
319 -- | Tagger constructors
320 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
321
322
323 --------------
324 -- | Sort | --
325 --------------
326
327
328 -- | Sort constructors
329 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
330 data Order = Asc | Desc deriving (Generic, Show, Read)
331
332
333 --------------------
334 -- | PhyloQuery | --
335 --------------------
336
337
338 -- | A Phyloquery describes a phylomemic reconstruction
339 data PhyloQueryBuild = PhyloQueryBuild
340 { _q_phyloTitle :: Text
341 , _q_phyloDesc :: Text
342
343 -- Grain and Steps for the PhyloPeriods
344 , _q_periodGrain :: Int
345 , _q_periodSteps :: Int
346
347 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
348 , _q_contextualUnit :: Cluster
349 , _q_contextualUnitMetrics :: [Metric]
350 , _q_contextualUnitFilters :: [Filter]
351
352 -- Inter-temporal matching method of the Phylo
353 , _q_interTemporalMatching :: Proximity
354 , _q_interTemporalMatchingFrame :: Int
355
356 , _q_reBranchThr :: Double
357 , _q_reBranchNth :: Int
358
359 -- Last level of reconstruction
360 , _q_nthLevel :: Level
361 -- Clustering method used from level 1 to nthLevel
362 , _q_nthCluster :: Cluster
363 } deriving (Generic, Show, Eq)
364
365 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
366 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
367 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
368
369 -------------------
370 -- | PhyloView | --
371 -------------------
372
373
374 -- | A PhyloView is the output type of a Phylo
375 data PhyloView = PhyloView
376 { _pv_param :: PhyloParam
377 , _pv_title :: Text
378 , _pv_description :: Text
379 , _pv_filiation :: Filiation
380 , _pv_level :: Level
381 , _pv_periods :: [PhyloPeriodId]
382 , _pv_metrics :: Map Text [Double]
383 , _pv_branches :: [PhyloBranch]
384 , _pv_nodes :: [PhyloNode]
385 , _pv_edges :: [PhyloEdge]
386 } deriving (Generic, Show)
387
388 -- | A phyloview is made of PhyloBranches, edges and nodes
389 data PhyloBranch = PhyloBranch
390 { _pb_id :: PhyloBranchId
391 , _pb_peak :: Text
392 , _pb_metrics :: Map Text [Double]
393 } deriving (Generic, Show)
394
395 data PhyloEdge = PhyloEdge
396 { _pe_source :: PhyloGroupId
397 , _pe_target :: PhyloGroupId
398 , _pe_type :: EdgeType
399 , _pe_weight :: Weight
400 } deriving (Generic, Show)
401
402 data PhyloNode = PhyloNode
403 { _pn_id :: PhyloGroupId
404 , _pn_bid :: Maybe PhyloBranchId
405 , _pn_label :: Text
406 , _pn_idx :: [Int]
407 , _pn_ngrams :: Maybe [Ngrams]
408 , _pn_metrics :: Map Text [Double]
409 , _pn_parents :: Maybe [PhyloGroupId]
410 , _pn_childs :: [PhyloNode]
411 } deriving (Generic, Show)
412
413 ------------------------
414 -- | PhyloQueryView | --
415 ------------------------
416
417
418 data ExportMode = Json | Dot | Svg
419 deriving (Generic, Show, Read)
420 data DisplayMode = Flat | Nested
421 deriving (Generic, Show, Read)
422
423 -- | A PhyloQueryView describes a Phylo as an output view
424 data PhyloQueryView = PhyloQueryView
425 { _qv_lvl :: Level
426
427 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
428 , _qv_filiation :: Filiation
429
430 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
431 , _qv_levelChilds :: Bool
432 , _qv_levelChildsDepth :: Level
433
434 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
435 -- Firstly the metrics, then the filters and the taggers
436 , _qv_metrics :: [Metric]
437 , _qv_filters :: [Filter]
438 , _qv_taggers :: [Tagger]
439
440 -- An asc or desc sort to apply to the PhyloGraph
441 , _qv_sort :: Maybe (Sort,Order)
442
443 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
444 , _qv_export :: ExportMode
445 , _qv_display :: DisplayMode
446 , _qv_verbose :: Bool
447 }
448
449
450 ----------------
451 -- | Lenses | --
452 ----------------
453
454
455 makeLenses ''PhyloParam
456 makeLenses ''Software
457 --
458 makeLenses ''Phylo
459 makeLenses ''PhyloFoundations
460 makeLenses ''PhyloGroup
461 makeLenses ''PhyloLevel
462 makeLenses ''PhyloPeriod
463 makeLenses ''PhyloFis
464 --
465 makeLenses ''Proximity
466 makeLenses ''Cluster
467 makeLenses ''Filter
468 --
469 makeLenses ''PhyloQueryBuild
470 makeLenses ''PhyloQueryView
471 --
472 makeLenses ''PhyloView
473 makeLenses ''PhyloBranch
474 makeLenses ''PhyloNode
475 makeLenses ''PhyloEdge
476
477
478 ------------------------
479 -- | JSON instances | --
480 ------------------------
481
482
483 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
484 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
485 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
486 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
487 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
488 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
489 --
490 $(deriveJSON (unPrefix "_software_" ) ''Software )
491 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
492 --
493 $(deriveJSON defaultOptions ''Filter )
494 $(deriveJSON defaultOptions ''Metric )
495 $(deriveJSON defaultOptions ''Cluster )
496 $(deriveJSON defaultOptions ''Proximity )
497 --
498 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
499 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
500 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
501 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
502 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
503 --
504 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
505 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
506 --
507 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
508 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
509 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
510 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
511 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
512
513 $(deriveJSON defaultOptions ''Filiation )
514 $(deriveJSON defaultOptions ''EdgeType )
515
516
517 ----------------------------
518 -- | TODO XML instances | --
519 ----------------------------
520