]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
fix temporalMatching
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
46
47 --------------------
48 -- | PhyloParam | --
49 --------------------
50
51
52 -- | Global parameters of a Phylo
53 data PhyloParam =
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
58
59
60 -- | Software parameters
61 data Software =
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
65
66
67 ---------------
68 -- | Phylo | --
69 ---------------
70
71
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
76 data Phylo =
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_docsByYears :: Map Date Double
81 , _phylo_cooc :: Map Date (Map (Int,Int) Double)
82 , _phylo_param :: PhyloParam
83 }
84 deriving (Generic, Show, Eq)
85
86
87 -- | The foundations of a phylomemy created from a given TermList
88 data PhyloFoundations =
89 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
90 , _phylo_foundationsTermsList :: TermList
91 } deriving (Generic, Show, Eq)
92
93
94 -- | Date : a simple Integer
95 type Date = Int
96
97 -- | UTCTime in seconds since UNIX epoch
98 -- type Start = POSIXTime
99 -- type End = POSIXTime
100 type Start = Date
101 type End = Date
102
103
104 ---------------------
105 -- | PhyloPeriod | --
106 ---------------------
107
108
109 -- | PhyloStep : steps of phylomemy on temporal axis
110 -- Period: tuple (start date, end date) of the step of the phylomemy
111 -- Levels: levels of granularity
112 data PhyloPeriod =
113 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
114 , _phylo_periodLevels :: [PhyloLevel]
115 }
116 deriving (Generic, Show, Eq)
117
118
119 --------------------
120 -- | PhyloLevel | --
121 --------------------
122
123
124 -- | PhyloLevel : levels of phylomemy on level axis
125 -- Levels description:
126 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
127 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
128 -- Level 1: First level of clustering
129 -- Level N: Nth level of clustering
130 data PhyloLevel =
131 PhyloLevel { _phylo_levelId :: PhyloLevelId
132 , _phylo_levelGroups :: [PhyloGroup]
133 }
134 deriving (Generic, Show, Eq)
135
136
137 --------------------
138 -- | PhyloGroup | --
139 --------------------
140
141
142 -- | PhyloGroup : group of ngrams at each level and step
143 -- Label : maybe has a label as text
144 -- Ngrams: set of terms that build the group
145 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
146 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
147 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
148 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
149 data PhyloGroup =
150 PhyloGroup { _phylo_groupId :: PhyloGroupId
151 , _phylo_groupLabel :: Text
152 , _phylo_groupNgrams :: [Int]
153 , _phylo_groupMeta :: Map Text Double
154 , _phylo_groupBranchId :: Maybe PhyloBranchId
155 , _phylo_groupCooc :: Map (Int,Int) Double
156
157 , _phylo_groupPeriodParents :: [Pointer]
158 , _phylo_groupPeriodChilds :: [Pointer]
159
160 , _phylo_groupLevelParents :: [Pointer]
161 , _phylo_groupLevelChilds :: [Pointer]
162 }
163 deriving (Generic, Show, Eq, Ord)
164
165
166 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
167 type Level = Int
168 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
169 type Index = Int
170
171
172 type PhyloPeriodId = (Start, End)
173 type PhyloLevelId = (PhyloPeriodId, Level)
174 type PhyloGroupId = (PhyloLevelId, Index)
175 type PhyloBranchId = (Level, Index)
176
177
178 -- | Weight : A generic mesure that can be associated with an Id
179 type Weight = Double
180 -- | Pointer : A weighted linked with a given PhyloGroup
181 type Pointer = (PhyloGroupId, Weight)
182 -- | Ngrams : a contiguous sequence of n terms
183 type Ngrams = Text
184
185
186 --------------------
187 -- | Aggregates | --
188 --------------------
189
190
191 -- | Document : a piece of Text linked to a Date
192 data Document = Document
193 { date :: Date
194 , text :: [Ngrams]
195 } deriving (Show,Generic)
196
197 -- | Clique : Set of ngrams cooccurring in the same Document
198 type Clique = Set Ngrams
199 -- | Support : Number of Documents where a Clique occurs
200 type Support = Int
201 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
202 data PhyloFis = PhyloFis
203 { _phyloFis_clique :: Clique
204 , _phyloFis_support :: Support
205 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
206 } deriving (Show)
207
208 -- | A list of clustered PhyloGroup
209 type PhyloCluster = [PhyloGroup]
210
211
212 -- | A PhyloGroup in a Graph
213 type GroupNode = PhyloGroup
214 -- | A weighted links between two PhyloGroups in a Graph
215 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
216 -- | The association as a Graph between a list of Nodes and a list of Edges
217 type GroupGraph = ([GroupNode],[GroupEdge])
218
219
220 ---------------
221 -- | Error | --
222 ---------------
223
224
225 data PhyloError = LevelDoesNotExist
226 | LevelUnassigned
227 deriving (Show)
228
229
230 -----------------
231 -- | Cluster | --
232 -----------------
233
234
235 -- | Cluster constructors
236 data Cluster = Fis FisParams
237 | RelatedComponents RCParams
238 | Louvain LouvainParams
239 deriving (Generic, Show, Eq, Read)
240
241 -- | Parameters for Fis clustering
242 data FisParams = FisParams
243 { _fis_keepMinorFis :: Bool
244 , _fis_minSupport :: Support
245 , _fis_minSize :: Int
246 } deriving (Generic, Show, Eq, Read)
247
248 -- | Parameters for RelatedComponents clustering
249 data RCParams = RCParams
250 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
251
252 -- | Parameters for Louvain clustering
253 data LouvainParams = LouvainParams
254 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
255
256
257 -------------------
258 -- | Proximity | --
259 -------------------
260
261
262 -- | Proximity constructors
263 data Proximity = WeightedLogJaccard WLJParams
264 | Hamming HammingParams
265 | Filiation
266 deriving (Generic, Show, Eq, Read)
267
268 -- | Parameters for WeightedLogJaccard proximity
269 data WLJParams = WLJParams
270 { _wlj_threshold :: Double
271 , _wlj_sensibility :: Double
272 } deriving (Generic, Show, Eq, Read)
273
274 -- | Parameters for Hamming proximity
275 data HammingParams = HammingParams
276 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
277
278
279 ----------------
280 -- | Filter | --
281 ----------------
282
283
284 -- | Filter constructors
285 data Filter = LonelyBranch LBParams
286 | SizeBranch SBParams
287 deriving (Generic, Show, Eq)
288
289 -- | Parameters for LonelyBranch filter
290 data LBParams = LBParams
291 { _lb_periodsInf :: Int
292 , _lb_periodsSup :: Int
293 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
294
295 -- | Parameters for SizeBranch filter
296 data SBParams = SBParams
297 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
298
299
300 ----------------
301 -- | Metric | --
302 ----------------
303
304
305 -- | Metric constructors
306 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
307
308
309 ----------------
310 -- | Tagger | --
311 ----------------
312
313
314 -- | Tagger constructors
315 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
316
317
318 --------------
319 -- | Sort | --
320 --------------
321
322
323 -- | Sort constructors
324 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
325 data Order = Asc | Desc deriving (Generic, Show, Read)
326
327
328 --------------------
329 -- | PhyloQuery | --
330 --------------------
331
332
333 -- | A Phyloquery describes a phylomemic reconstruction
334 data PhyloQueryBuild = PhyloQueryBuild
335 { _q_phyloTitle :: Text
336 , _q_phyloDesc :: Text
337
338 -- Grain and Steps for the PhyloPeriods
339 , _q_periodGrain :: Int
340 , _q_periodSteps :: Int
341
342 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
343 , _q_contextualUnit :: Cluster
344 , _q_contextualUnitMetrics :: [Metric]
345 , _q_contextualUnitFilters :: [Filter]
346
347 -- Inter-temporal matching method of the Phylo
348 , _q_interTemporalMatching :: Proximity
349
350 -- Last level of reconstruction
351 , _q_nthLevel :: Level
352 -- Clustering method used from level 1 to nthLevel
353 , _q_nthCluster :: Cluster
354 } deriving (Generic, Show, Eq)
355
356 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
357 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
358 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
359
360 -------------------
361 -- | PhyloView | --
362 -------------------
363
364
365 -- | A PhyloView is the output type of a Phylo
366 data PhyloView = PhyloView
367 { _pv_param :: PhyloParam
368 , _pv_title :: Text
369 , _pv_description :: Text
370 , _pv_filiation :: Filiation
371 , _pv_level :: Level
372 , _pv_periods :: [PhyloPeriodId]
373 , _pv_metrics :: Map Text [Double]
374 , _pv_branches :: [PhyloBranch]
375 , _pv_nodes :: [PhyloNode]
376 , _pv_edges :: [PhyloEdge]
377 } deriving (Generic, Show)
378
379 -- | A phyloview is made of PhyloBranches, edges and nodes
380 data PhyloBranch = PhyloBranch
381 { _pb_id :: PhyloBranchId
382 , _pb_peak :: Text
383 , _pb_metrics :: Map Text [Double]
384 } deriving (Generic, Show)
385
386 data PhyloEdge = PhyloEdge
387 { _pe_source :: PhyloGroupId
388 , _pe_target :: PhyloGroupId
389 , _pe_type :: EdgeType
390 , _pe_weight :: Weight
391 } deriving (Generic, Show)
392
393 data PhyloNode = PhyloNode
394 { _pn_id :: PhyloGroupId
395 , _pn_bid :: Maybe PhyloBranchId
396 , _pn_label :: Text
397 , _pn_idx :: [Int]
398 , _pn_ngrams :: Maybe [Ngrams]
399 , _pn_metrics :: Map Text [Double]
400 , _pn_parents :: Maybe [PhyloGroupId]
401 , _pn_childs :: [PhyloNode]
402 } deriving (Generic, Show)
403
404 ------------------------
405 -- | PhyloQueryView | --
406 ------------------------
407
408
409 data ExportMode = Json | Dot | Svg
410 deriving (Generic, Show, Read)
411 data DisplayMode = Flat | Nested
412 deriving (Generic, Show, Read)
413
414 -- | A PhyloQueryView describes a Phylo as an output view
415 data PhyloQueryView = PhyloQueryView
416 { _qv_lvl :: Level
417
418 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
419 , _qv_filiation :: Filiation
420
421 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
422 , _qv_levelChilds :: Bool
423 , _qv_levelChildsDepth :: Level
424
425 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
426 -- Firstly the metrics, then the filters and the taggers
427 , _qv_metrics :: [Metric]
428 , _qv_filters :: [Filter]
429 , _qv_taggers :: [Tagger]
430
431 -- An asc or desc sort to apply to the PhyloGraph
432 , _qv_sort :: Maybe (Sort,Order)
433
434 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
435 , _qv_export :: ExportMode
436 , _qv_display :: DisplayMode
437 , _qv_verbose :: Bool
438 }
439
440
441 ----------------
442 -- | Lenses | --
443 ----------------
444
445
446 makeLenses ''PhyloParam
447 makeLenses ''Software
448 --
449 makeLenses ''Phylo
450 makeLenses ''PhyloFoundations
451 makeLenses ''PhyloGroup
452 makeLenses ''PhyloLevel
453 makeLenses ''PhyloPeriod
454 makeLenses ''PhyloFis
455 --
456 makeLenses ''Proximity
457 makeLenses ''Cluster
458 makeLenses ''Filter
459 --
460 makeLenses ''PhyloQueryBuild
461 makeLenses ''PhyloQueryView
462 --
463 makeLenses ''PhyloView
464 makeLenses ''PhyloBranch
465 makeLenses ''PhyloNode
466 makeLenses ''PhyloEdge
467
468
469 ------------------------
470 -- | JSON instances | --
471 ------------------------
472
473
474 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
475 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
476 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
477 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
478 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
479 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
480 --
481 $(deriveJSON (unPrefix "_software_" ) ''Software )
482 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
483 --
484 $(deriveJSON defaultOptions ''Filter )
485 $(deriveJSON defaultOptions ''Metric )
486 $(deriveJSON defaultOptions ''Cluster )
487 $(deriveJSON defaultOptions ''Proximity )
488 --
489 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
490 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
491 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
492 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
493 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
494 --
495 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
496 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
497 --
498 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
499 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
500 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
501 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
502 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
503
504 $(deriveJSON defaultOptions ''Filiation )
505 $(deriveJSON defaultOptions ''EdgeType )
506
507
508 ----------------------------
509 -- | TODO XML instances | --
510 ----------------------------
511