]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
[REFACT] before scoring new ngrams lists.
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
46
47 --------------------
48 -- | PhyloParam | --
49 --------------------
50
51
52 -- | Global parameters of a Phylo
53 data PhyloParam =
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
58
59
60 -- | Software parameters
61 data Software =
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
65
66
67 ---------------
68 -- | Phylo | --
69 ---------------
70
71
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
76 data Phylo =
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_param :: PhyloParam
81 }
82 deriving (Generic, Show, Eq)
83
84
85 -- | The foundations of a phylomemy created from a given TermList
86 data PhyloFoundations =
87 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
88 , _phylo_foundationsTermsList :: TermList
89 } deriving (Generic, Show, Eq)
90
91
92 -- | Date : a simple Integer
93 type Date = Int
94
95 -- | UTCTime in seconds since UNIX epoch
96 -- type Start = POSIXTime
97 -- type End = POSIXTime
98 type Start = Date
99 type End = Date
100
101
102 ---------------------
103 -- | PhyloPeriod | --
104 ---------------------
105
106
107 -- | PhyloStep : steps of phylomemy on temporal axis
108 -- Period: tuple (start date, end date) of the step of the phylomemy
109 -- Levels: levels of granularity
110 data PhyloPeriod =
111 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
112 , _phylo_periodLevels :: [PhyloLevel]
113 }
114 deriving (Generic, Show, Eq)
115
116
117 --------------------
118 -- | PhyloLevel | --
119 --------------------
120
121
122 -- | PhyloLevel : levels of phylomemy on level axis
123 -- Levels description:
124 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
125 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
126 -- Level 1: First level of clustering
127 -- Level N: Nth level of clustering
128 data PhyloLevel =
129 PhyloLevel { _phylo_levelId :: PhyloLevelId
130 , _phylo_levelGroups :: [PhyloGroup]
131 }
132 deriving (Generic, Show, Eq)
133
134
135 --------------------
136 -- | PhyloGroup | --
137 --------------------
138
139
140 -- | PhyloGroup : group of ngrams at each level and step
141 -- Label : maybe has a label as text
142 -- Ngrams: set of terms that build the group
143 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
144 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
145 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
146 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
147 data PhyloGroup =
148 PhyloGroup { _phylo_groupId :: PhyloGroupId
149 , _phylo_groupLabel :: Text
150 , _phylo_groupNgrams :: [Int]
151 , _phylo_groupMeta :: Map Text Double
152 , _phylo_groupBranchId :: Maybe PhyloBranchId
153
154 , _phylo_groupPeriodParents :: [Pointer]
155 , _phylo_groupPeriodChilds :: [Pointer]
156
157 , _phylo_groupLevelParents :: [Pointer]
158 , _phylo_groupLevelChilds :: [Pointer]
159 }
160 deriving (Generic, Show, Eq, Ord)
161
162
163 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
164 type Level = Int
165 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
166 type Index = Int
167
168
169 type PhyloPeriodId = (Start, End)
170 type PhyloLevelId = (PhyloPeriodId, Level)
171 type PhyloGroupId = (PhyloLevelId, Index)
172 type PhyloBranchId = (Level, Index)
173
174
175 -- | Weight : A generic mesure that can be associated with an Id
176 type Weight = Double
177 -- | Pointer : A weighted linked with a given PhyloGroup
178 type Pointer = (PhyloGroupId, Weight)
179 -- | Ngrams : a contiguous sequence of n terms
180 type Ngrams = Text
181
182
183 --------------------
184 -- | Aggregates | --
185 --------------------
186
187
188 -- | Document : a piece of Text linked to a Date
189 data Document = Document
190 { date :: Date
191 , text :: [Ngrams]
192 } deriving (Show,Generic)
193
194 -- | Clique : Set of ngrams cooccurring in the same Document
195 type Clique = Set Ngrams
196 -- | Support : Number of Documents where a Clique occurs
197 type Support = Int
198 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
199 data PhyloFis = PhyloFis
200 { _phyloFis_clique :: Clique
201 , _phyloFis_support :: Support
202 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
203 } deriving (Show)
204
205 -- | A list of clustered PhyloGroup
206 type PhyloCluster = [PhyloGroup]
207
208
209 -- | A PhyloGroup in a Graph
210 type GroupNode = PhyloGroup
211 -- | A weighted links between two PhyloGroups in a Graph
212 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
213 -- | The association as a Graph between a list of Nodes and a list of Edges
214 type GroupGraph = ([GroupNode],[GroupEdge])
215
216
217 ---------------
218 -- | Error | --
219 ---------------
220
221
222 data PhyloError = LevelDoesNotExist
223 | LevelUnassigned
224 deriving (Show)
225
226
227 -----------------
228 -- | Cluster | --
229 -----------------
230
231
232 -- | Cluster constructors
233 data Cluster = Fis FisParams
234 | RelatedComponents RCParams
235 | Louvain LouvainParams
236 deriving (Generic, Show, Eq, Read)
237
238 -- | Parameters for Fis clustering
239 data FisParams = FisParams
240 { _fis_keepMinorFis :: Bool
241 , _fis_minSupport :: Support
242 , _fis_minSize :: Int
243 } deriving (Generic, Show, Eq, Read)
244
245 -- | Parameters for RelatedComponents clustering
246 data RCParams = RCParams
247 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
248
249 -- | Parameters for Louvain clustering
250 data LouvainParams = LouvainParams
251 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
252
253
254 -------------------
255 -- | Proximity | --
256 -------------------
257
258
259 -- | Proximity constructors
260 data Proximity = WeightedLogJaccard WLJParams
261 | Hamming HammingParams
262 | Filiation
263 deriving (Generic, Show, Eq, Read)
264
265 -- | Parameters for WeightedLogJaccard proximity
266 data WLJParams = WLJParams
267 { _wlj_threshold :: Double
268 , _wlj_sensibility :: Double
269 } deriving (Generic, Show, Eq, Read)
270
271 -- | Parameters for Hamming proximity
272 data HammingParams = HammingParams
273 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
274
275
276 ----------------
277 -- | Filter | --
278 ----------------
279
280
281 -- | Filter constructors
282 data Filter = LonelyBranch LBParams
283 | SizeBranch SBParams
284 deriving (Generic, Show, Eq)
285
286 -- | Parameters for LonelyBranch filter
287 data LBParams = LBParams
288 { _lb_periodsInf :: Int
289 , _lb_periodsSup :: Int
290 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
291
292 -- | Parameters for SizeBranch filter
293 data SBParams = SBParams
294 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
295
296
297 ----------------
298 -- | Metric | --
299 ----------------
300
301
302 -- | Metric constructors
303 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
304
305
306 ----------------
307 -- | Tagger | --
308 ----------------
309
310
311 -- | Tagger constructors
312 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
313
314
315 --------------
316 -- | Sort | --
317 --------------
318
319
320 -- | Sort constructors
321 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
322 data Order = Asc | Desc deriving (Generic, Show, Read)
323
324
325 --------------------
326 -- | PhyloQuery | --
327 --------------------
328
329
330 -- | A Phyloquery describes a phylomemic reconstruction
331 data PhyloQueryBuild = PhyloQueryBuild
332 { _q_phyloTitle :: Text
333 , _q_phyloDesc :: Text
334
335 -- Grain and Steps for the PhyloPeriods
336 , _q_periodGrain :: Int
337 , _q_periodSteps :: Int
338
339 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
340 , _q_contextualUnit :: Cluster
341 , _q_contextualUnitMetrics :: [Metric]
342 , _q_contextualUnitFilters :: [Filter]
343
344 -- Inter-temporal matching method of the Phylo
345 , _q_interTemporalMatching :: Proximity
346
347 -- Last level of reconstruction
348 , _q_nthLevel :: Level
349 -- Clustering method used from level 1 to nthLevel
350 , _q_nthCluster :: Cluster
351 } deriving (Generic, Show, Eq)
352
353 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
354 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
355 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
356
357 -------------------
358 -- | PhyloView | --
359 -------------------
360
361
362 -- | A PhyloView is the output type of a Phylo
363 data PhyloView = PhyloView
364 { _pv_param :: PhyloParam
365 , _pv_title :: Text
366 , _pv_description :: Text
367 , _pv_filiation :: Filiation
368 , _pv_level :: Level
369 , _pv_periods :: [PhyloPeriodId]
370 , _pv_metrics :: Map Text [Double]
371 , _pv_branches :: [PhyloBranch]
372 , _pv_nodes :: [PhyloNode]
373 , _pv_edges :: [PhyloEdge]
374 } deriving (Generic, Show)
375
376 -- | A phyloview is made of PhyloBranches, edges and nodes
377 data PhyloBranch = PhyloBranch
378 { _pb_id :: PhyloBranchId
379 , _pb_peak :: Text
380 , _pb_metrics :: Map Text [Double]
381 } deriving (Generic, Show)
382
383 data PhyloEdge = PhyloEdge
384 { _pe_source :: PhyloGroupId
385 , _pe_target :: PhyloGroupId
386 , _pe_type :: EdgeType
387 , _pe_weight :: Weight
388 } deriving (Generic, Show)
389
390 data PhyloNode = PhyloNode
391 { _pn_id :: PhyloGroupId
392 , _pn_bid :: Maybe PhyloBranchId
393 , _pn_label :: Text
394 , _pn_idx :: [Int]
395 , _pn_ngrams :: Maybe [Ngrams]
396 , _pn_metrics :: Map Text [Double]
397 , _pn_parents :: Maybe [PhyloGroupId]
398 , _pn_childs :: [PhyloNode]
399 } deriving (Generic, Show)
400
401 ------------------------
402 -- | PhyloQueryView | --
403 ------------------------
404
405
406 data ExportMode = Json | Dot | Svg
407 deriving (Generic, Show, Read)
408 data DisplayMode = Flat | Nested
409 deriving (Generic, Show, Read)
410
411 -- | A PhyloQueryView describes a Phylo as an output view
412 data PhyloQueryView = PhyloQueryView
413 { _qv_lvl :: Level
414
415 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
416 , _qv_filiation :: Filiation
417
418 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
419 , _qv_levelChilds :: Bool
420 , _qv_levelChildsDepth :: Level
421
422 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
423 -- Firstly the metrics, then the filters and the taggers
424 , _qv_metrics :: [Metric]
425 , _qv_filters :: [Filter]
426 , _qv_taggers :: [Tagger]
427
428 -- An asc or desc sort to apply to the PhyloGraph
429 , _qv_sort :: Maybe (Sort,Order)
430
431 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
432 , _qv_export :: ExportMode
433 , _qv_display :: DisplayMode
434 , _qv_verbose :: Bool
435 }
436
437
438 ----------------
439 -- | Lenses | --
440 ----------------
441
442
443 makeLenses ''PhyloParam
444 makeLenses ''Software
445 --
446 makeLenses ''Phylo
447 makeLenses ''PhyloFoundations
448 makeLenses ''PhyloGroup
449 makeLenses ''PhyloLevel
450 makeLenses ''PhyloPeriod
451 makeLenses ''PhyloFis
452 --
453 makeLenses ''Proximity
454 makeLenses ''Cluster
455 makeLenses ''Filter
456 --
457 makeLenses ''PhyloQueryBuild
458 makeLenses ''PhyloQueryView
459 --
460 makeLenses ''PhyloView
461 makeLenses ''PhyloBranch
462 makeLenses ''PhyloNode
463 makeLenses ''PhyloEdge
464
465
466 ------------------------
467 -- | JSON instances | --
468 ------------------------
469
470
471 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
472 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
473 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
474 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
475 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
476 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
477 --
478 $(deriveJSON (unPrefix "_software_" ) ''Software )
479 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
480 --
481 $(deriveJSON defaultOptions ''Filter )
482 $(deriveJSON defaultOptions ''Metric )
483 $(deriveJSON defaultOptions ''Cluster )
484 $(deriveJSON defaultOptions ''Proximity )
485 --
486 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
487 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
488 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
489 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
490 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
491 --
492 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
493 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
494 --
495 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
496 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
497 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
498 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
499 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
500
501 $(deriveJSON defaultOptions ''Filiation )
502 $(deriveJSON defaultOptions ''EdgeType )
503
504
505 ----------------------------
506 -- | TODO XML instances | --
507 ----------------------------
508