]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
Merge branch 'dev-phylo' into dev-merge
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
46
47 import Control.DeepSeq
48
49 --------------------
50 -- | PhyloParam | --
51 --------------------
52
53
54 -- | Global parameters of a Phylo
55 data PhyloParam =
56 PhyloParam { _phyloParam_version :: Text -- Double ?
57 , _phyloParam_software :: Software
58 , _phyloParam_query :: PhyloQueryBuild
59 } deriving (Generic, Show, Eq)
60
61
62 -- | Software parameters
63 data Software =
64 Software { _software_name :: Text
65 , _software_version :: Text
66 } deriving (Generic, Show, Eq)
67
68
69 ---------------
70 -- | Phylo | --
71 ---------------
72
73
74 -- | Phylo datatype of a phylomemy
75 -- Duration : time Segment of the whole Phylo
76 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
77 -- Periods : list of all the periods of a Phylo
78 data Phylo =
79 Phylo { _phylo_duration :: (Start, End)
80 , _phylo_foundations :: PhyloFoundations
81 , _phylo_periods :: [PhyloPeriod]
82 , _phylo_docsByYears :: Map Date Double
83 , _phylo_cooc :: Map Date (Map (Int,Int) Double)
84 , _phylo_fis :: Map (Date,Date) [PhyloFis]
85 , _phylo_param :: PhyloParam
86 }
87 deriving (Generic, Show, Eq)
88
89
90 -- | The foundations of a phylomemy created from a given TermList
91 data PhyloFoundations =
92 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
93 , _phylo_foundationsTermsList :: TermList
94 } deriving (Generic, Show, Eq)
95
96
97 -- | Date : a simple Integer
98 type Date = Int
99
100 -- | UTCTime in seconds since UNIX epoch
101 -- type Start = POSIXTime
102 -- type End = POSIXTime
103 type Start = Date
104 type End = Date
105
106
107 ---------------------
108 -- | PhyloPeriod | --
109 ---------------------
110
111
112 -- | PhyloStep : steps of phylomemy on temporal axis
113 -- Period: tuple (start date, end date) of the step of the phylomemy
114 -- Levels: levels of granularity
115 data PhyloPeriod =
116 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
117 , _phylo_periodLevels :: [PhyloLevel]
118 }
119 deriving (Generic, Show, Eq)
120
121
122 --------------------
123 -- | PhyloLevel | --
124 --------------------
125
126
127 -- | PhyloLevel : levels of phylomemy on level axis
128 -- Levels description:
129 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
130 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
131 -- Level 1: First level of clustering
132 -- Level N: Nth level of clustering
133 data PhyloLevel =
134 PhyloLevel { _phylo_levelId :: PhyloLevelId
135 , _phylo_levelGroups :: [PhyloGroup]
136 }
137 deriving (Generic, Show, Eq)
138
139
140 --------------------
141 -- | PhyloGroup | --
142 --------------------
143
144
145 -- | PhyloGroup : group of ngrams at each level and step
146 -- Label : maybe has a label as text
147 -- Ngrams: set of terms that build the group
148 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
149 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
150 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
151 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
152 data PhyloGroup =
153 PhyloGroup { _phylo_groupId :: PhyloGroupId
154 , _phylo_groupLabel :: Text
155 , _phylo_groupNgrams :: [Int]
156 , _phylo_groupMeta :: Map Text Double
157 , _phylo_groupBranchId :: Maybe PhyloBranchId
158 , _phylo_groupCooc :: Map (Int,Int) Double
159
160 , _phylo_groupPeriodParents :: [Pointer]
161 , _phylo_groupPeriodChilds :: [Pointer]
162
163 , _phylo_groupLevelParents :: [Pointer]
164 , _phylo_groupLevelChilds :: [Pointer]
165 }
166 deriving (Generic, NFData, Show, Eq, Ord)
167
168 -- instance NFData PhyloGroup
169
170
171 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
172 type Level = Int
173 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
174 type Index = Int
175
176
177 type PhyloPeriodId = (Start, End)
178 type PhyloLevelId = (PhyloPeriodId, Level)
179 type PhyloGroupId = (PhyloLevelId, Index)
180 type PhyloBranchId = (Level, Index)
181
182
183 -- | Weight : A generic mesure that can be associated with an Id
184 type Weight = Double
185 -- | Pointer : A weighted linked with a given PhyloGroup
186 type Pointer = (PhyloGroupId, Weight)
187 -- | Ngrams : a contiguous sequence of n terms
188 type Ngrams = Text
189
190
191 --------------------
192 -- | Aggregates | --
193 --------------------
194
195
196 -- | Document : a piece of Text linked to a Date
197 data Document = Document
198 { date :: Date
199 , text :: [Ngrams]
200 } deriving (Show,Generic)
201
202 -- | Clique : Set of ngrams cooccurring in the same Document
203 type Clique = Set Ngrams
204 -- | Support : Number of Documents where a Clique occurs
205 type Support = Int
206 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
207 data PhyloFis = PhyloFis
208 { _phyloFis_clique :: Clique
209 , _phyloFis_support :: Support
210 , _phyloFis_period :: (Date,Date)
211 } deriving (Generic,Show,Eq)
212
213 -- | A list of clustered PhyloGroup
214 type PhyloCluster = [PhyloGroup]
215
216
217 -- | A PhyloGroup in a Graph
218 type GroupNode = PhyloGroup
219 -- | A weighted links between two PhyloGroups in a Graph
220 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
221 -- | The association as a Graph between a list of Nodes and a list of Edges
222 type GroupGraph = ([GroupNode],[GroupEdge])
223
224
225 ---------------
226 -- | Error | --
227 ---------------
228
229
230 data PhyloError = LevelDoesNotExist
231 | LevelUnassigned
232 deriving (Show)
233
234
235 -----------------
236 -- | Cluster | --
237 -----------------
238
239
240 -- | Cluster constructors
241 data Cluster = Fis FisParams
242 | RelatedComponents RCParams
243 | Louvain LouvainParams
244 deriving (Generic, Show, Eq, Read)
245
246 -- | Parameters for Fis clustering
247 data FisParams = FisParams
248 { _fis_keepMinorFis :: Bool
249 , _fis_minSupport :: Support
250 , _fis_minSize :: Int
251 } deriving (Generic, Show, Eq, Read)
252
253 -- | Parameters for RelatedComponents clustering
254 data RCParams = RCParams
255 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
256
257 -- | Parameters for Louvain clustering
258 data LouvainParams = LouvainParams
259 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
260
261
262 -------------------
263 -- | Proximity | --
264 -------------------
265
266
267 -- | Proximity constructors
268 data Proximity = WeightedLogJaccard WLJParams
269 | Hamming HammingParams
270 | Filiation
271 deriving (Generic, Show, Eq, Read)
272
273 -- | Parameters for WeightedLogJaccard proximity
274 data WLJParams = WLJParams
275 { _wlj_threshold :: Double
276 , _wlj_sensibility :: Double
277 } deriving (Generic, Show, Eq, Read)
278
279 -- | Parameters for Hamming proximity
280 data HammingParams = HammingParams
281 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
282
283
284 ----------------
285 -- | Filter | --
286 ----------------
287
288
289 -- | Filter constructors
290 data Filter = LonelyBranch LBParams
291 | SizeBranch SBParams
292 deriving (Generic, Show, Eq)
293
294 -- | Parameters for LonelyBranch filter
295 data LBParams = LBParams
296 { _lb_periodsInf :: Int
297 , _lb_periodsSup :: Int
298 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
299
300 -- | Parameters for SizeBranch filter
301 data SBParams = SBParams
302 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
303
304
305 ----------------
306 -- | Metric | --
307 ----------------
308
309
310 -- | Metric constructors
311 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
312
313
314 ----------------
315 -- | Tagger | --
316 ----------------
317
318
319 -- | Tagger constructors
320 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
321
322
323 --------------
324 -- | Sort | --
325 --------------
326
327
328 -- | Sort constructors
329 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
330 data Order = Asc | Desc deriving (Generic, Show, Read)
331
332
333 --------------------
334 -- | PhyloQuery | --
335 --------------------
336
337
338 -- | A Phyloquery describes a phylomemic reconstruction
339 data PhyloQueryBuild = PhyloQueryBuild
340 { _q_phyloTitle :: Text
341 , _q_phyloDesc :: Text
342
343 -- Grain and Steps for the PhyloPeriods
344 , _q_periodGrain :: Int
345 , _q_periodSteps :: Int
346
347 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
348 , _q_contextualUnit :: Cluster
349 , _q_contextualUnitMetrics :: [Metric]
350 , _q_contextualUnitFilters :: [Filter]
351
352 -- Inter-temporal matching method of the Phylo
353 , _q_interTemporalMatching :: Proximity
354 , _q_interTemporalMatchingFrame :: Int
355 , _q_interTemporalMatchingFrameTh :: Double
356
357 , _q_reBranchThr :: Double
358 , _q_reBranchNth :: Int
359
360 -- Last level of reconstruction
361 , _q_nthLevel :: Level
362 -- Clustering method used from level 1 to nthLevel
363 , _q_nthCluster :: Cluster
364 } deriving (Generic, Show, Eq)
365
366 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
367 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
368 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
369
370 -------------------
371 -- | PhyloView | --
372 -------------------
373
374
375 -- | A PhyloView is the output type of a Phylo
376 data PhyloView = PhyloView
377 { _pv_param :: PhyloParam
378 , _pv_title :: Text
379 , _pv_description :: Text
380 , _pv_filiation :: Filiation
381 , _pv_level :: Level
382 , _pv_periods :: [PhyloPeriodId]
383 , _pv_metrics :: Map Text [Double]
384 , _pv_branches :: [PhyloBranch]
385 , _pv_nodes :: [PhyloNode]
386 , _pv_edges :: [PhyloEdge]
387 } deriving (Generic, Show)
388
389 -- | A phyloview is made of PhyloBranches, edges and nodes
390 data PhyloBranch = PhyloBranch
391 { _pb_id :: PhyloBranchId
392 , _pb_peak :: Text
393 , _pb_metrics :: Map Text [Double]
394 } deriving (Generic, Show)
395
396 data PhyloEdge = PhyloEdge
397 { _pe_source :: PhyloGroupId
398 , _pe_target :: PhyloGroupId
399 , _pe_type :: EdgeType
400 , _pe_weight :: Weight
401 } deriving (Generic, Show)
402
403 data PhyloNode = PhyloNode
404 { _pn_id :: PhyloGroupId
405 , _pn_bid :: Maybe PhyloBranchId
406 , _pn_label :: Text
407 , _pn_idx :: [Int]
408 , _pn_ngrams :: Maybe [Ngrams]
409 , _pn_metrics :: Map Text [Double]
410 , _pn_parents :: Maybe [PhyloGroupId]
411 , _pn_childs :: [PhyloNode]
412 } deriving (Generic, Show)
413
414 ------------------------
415 -- | PhyloQueryView | --
416 ------------------------
417
418
419 data ExportMode = Json | Dot | Svg
420 deriving (Generic, Show, Read)
421 data DisplayMode = Flat | Nested
422 deriving (Generic, Show, Read)
423
424 -- | A PhyloQueryView describes a Phylo as an output view
425 data PhyloQueryView = PhyloQueryView
426 { _qv_lvl :: Level
427
428 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
429 , _qv_filiation :: Filiation
430
431 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
432 , _qv_levelChilds :: Bool
433 , _qv_levelChildsDepth :: Level
434
435 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
436 -- Firstly the metrics, then the filters and the taggers
437 , _qv_metrics :: [Metric]
438 , _qv_filters :: [Filter]
439 , _qv_taggers :: [Tagger]
440
441 -- An asc or desc sort to apply to the PhyloGraph
442 , _qv_sort :: Maybe (Sort,Order)
443
444 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
445 , _qv_export :: ExportMode
446 , _qv_display :: DisplayMode
447 , _qv_verbose :: Bool
448 }
449
450
451 ----------------
452 -- | Lenses | --
453 ----------------
454
455
456 makeLenses ''PhyloParam
457 makeLenses ''Software
458 --
459 makeLenses ''Phylo
460 makeLenses ''PhyloFoundations
461 makeLenses ''PhyloGroup
462 makeLenses ''PhyloLevel
463 makeLenses ''PhyloPeriod
464 makeLenses ''PhyloFis
465 --
466 makeLenses ''Proximity
467 makeLenses ''Cluster
468 makeLenses ''Filter
469 --
470 makeLenses ''PhyloQueryBuild
471 makeLenses ''PhyloQueryView
472 --
473 makeLenses ''PhyloView
474 makeLenses ''PhyloBranch
475 makeLenses ''PhyloNode
476 makeLenses ''PhyloEdge
477
478
479 ------------------------
480 -- | JSON instances | --
481 ------------------------
482
483
484 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
485 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
486 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
487 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
488 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
489 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
490 --
491 $(deriveJSON (unPrefix "_software_" ) ''Software )
492 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
493 --
494 $(deriveJSON defaultOptions ''Filter )
495 $(deriveJSON defaultOptions ''Metric )
496 $(deriveJSON defaultOptions ''Cluster )
497 $(deriveJSON defaultOptions ''Proximity )
498 --
499 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
500 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
501 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
502 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
503 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
504 --
505 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
506 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
507 --
508 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
509 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
510 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
511 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
512 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
513
514 $(deriveJSON defaultOptions ''Filiation )
515 $(deriveJSON defaultOptions ''EdgeType )
516
517
518 ----------------------------
519 -- | TODO XML instances | --
520 ----------------------------
521