]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
Merge branch 'dev-phylo' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell-gargantex...
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
46
47 import Control.DeepSeq
48
49 --------------------
50 -- | PhyloParam | --
51 --------------------
52
53
54 -- | Global parameters of a Phylo
55 data PhyloParam =
56 PhyloParam { _phyloParam_version :: Text -- Double ?
57 , _phyloParam_software :: Software
58 , _phyloParam_query :: PhyloQueryBuild
59 } deriving (Generic, Show, Eq)
60
61
62 -- | Software parameters
63 data Software =
64 Software { _software_name :: Text
65 , _software_version :: Text
66 } deriving (Generic, Show, Eq)
67
68
69 ---------------
70 -- | Phylo | --
71 ---------------
72
73
74 -- | Phylo datatype of a phylomemy
75 -- Duration : time Segment of the whole Phylo
76 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
77 -- Periods : list of all the periods of a Phylo
78 data Phylo =
79 Phylo { _phylo_duration :: (Start, End)
80 , _phylo_foundations :: PhyloFoundations
81 , _phylo_periods :: [PhyloPeriod]
82 , _phylo_docsByYears :: Map Date Double
83 , _phylo_cooc :: Map Date (Map (Int,Int) Double)
84 , _phylo_fis :: Map (Date,Date) [PhyloFis]
85 , _phylo_param :: PhyloParam
86 }
87 deriving (Generic, Show, Eq)
88
89
90 -- | The foundations of a phylomemy created from a given TermList
91 data PhyloFoundations =
92 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
93 , _phylo_foundationsTermsList :: TermList
94 } deriving (Generic, Show, Eq)
95
96
97 -- | Date : a simple Integer
98 type Date = Int
99
100 -- | UTCTime in seconds since UNIX epoch
101 -- type Start = POSIXTime
102 -- type End = POSIXTime
103 type Start = Date
104 type End = Date
105
106
107 ---------------------
108 -- | PhyloPeriod | --
109 ---------------------
110
111
112 -- | PhyloStep : steps of phylomemy on temporal axis
113 -- Period: tuple (start date, end date) of the step of the phylomemy
114 -- Levels: levels of granularity
115 data PhyloPeriod =
116 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
117 , _phylo_periodLevels :: [PhyloLevel]
118 }
119 deriving (Generic, Show, Eq)
120
121
122 --------------------
123 -- | PhyloLevel | --
124 --------------------
125
126
127 -- | PhyloLevel : levels of phylomemy on level axis
128 -- Levels description:
129 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
130 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
131 -- Level 1: First level of clustering
132 -- Level N: Nth level of clustering
133 data PhyloLevel =
134 PhyloLevel { _phylo_levelId :: PhyloLevelId
135 , _phylo_levelGroups :: [PhyloGroup]
136 }
137 deriving (Generic, Show, Eq)
138
139
140 --------------------
141 -- | PhyloGroup | --
142 --------------------
143
144
145 -- | PhyloGroup : group of ngrams at each level and step
146 -- Label : maybe has a label as text
147 -- Ngrams: set of terms that build the group
148 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
149 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
150 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
151 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
152 data PhyloGroup =
153 PhyloGroup { _phylo_groupId :: PhyloGroupId
154 , _phylo_groupLabel :: Text
155 , _phylo_groupNgrams :: [Int]
156 , _phylo_groupNgramsMeta :: Map Text [Double]
157 , _phylo_groupMeta :: Map Text Double
158 , _phylo_groupBranchId :: Maybe PhyloBranchId
159 , _phylo_groupCooc :: Map (Int,Int) Double
160
161 , _phylo_groupPeriodParents :: [Pointer]
162 , _phylo_groupPeriodChilds :: [Pointer]
163
164 , _phylo_groupLevelParents :: [Pointer]
165 , _phylo_groupLevelChilds :: [Pointer]
166 }
167 deriving (Generic, NFData, Show, Eq, Ord)
168
169 -- instance NFData PhyloGroup
170
171
172 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
173 type Level = Int
174 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
175 type Index = Int
176
177
178 type PhyloPeriodId = (Start, End)
179 type PhyloLevelId = (PhyloPeriodId, Level)
180 type PhyloGroupId = (PhyloLevelId, Index)
181 type PhyloBranchId = (Level, Index)
182
183
184 -- | Weight : A generic mesure that can be associated with an Id
185 type Weight = Double
186 -- | Pointer : A weighted linked with a given PhyloGroup
187 type Pointer = (PhyloGroupId, Weight)
188 -- | Ngrams : a contiguous sequence of n terms
189 type Ngrams = Text
190
191
192 --------------------
193 -- | Aggregates | --
194 --------------------
195
196
197 -- | Document : a piece of Text linked to a Date
198 data Document = Document
199 { date :: Date
200 , text :: [Ngrams]
201 } deriving (Show,Generic)
202
203 -- | Clique : Set of ngrams cooccurring in the same Document
204 type Clique = Set Ngrams
205 -- | Support : Number of Documents where a Clique occurs
206 type Support = Int
207 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
208 data PhyloFis = PhyloFis
209 { _phyloFis_clique :: Clique
210 , _phyloFis_support :: Support
211 , _phyloFis_period :: (Date,Date)
212 } deriving (Generic,Show,Eq)
213
214 -- | A list of clustered PhyloGroup
215 type PhyloCluster = [PhyloGroup]
216
217
218 -- | A PhyloGroup in a Graph
219 type GroupNode = PhyloGroup
220 -- | A weighted links between two PhyloGroups in a Graph
221 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
222 -- | The association as a Graph between a list of Nodes and a list of Edges
223 type GroupGraph = ([GroupNode],[GroupEdge])
224
225
226 ---------------
227 -- | Error | --
228 ---------------
229
230
231 data PhyloError = LevelDoesNotExist
232 | LevelUnassigned
233 deriving (Show)
234
235
236 -----------------
237 -- | Cluster | --
238 -----------------
239
240
241 -- | Cluster constructors
242 data Cluster = Fis FisParams
243 | RelatedComponents RCParams
244 | Louvain LouvainParams
245 deriving (Generic, Show, Eq, Read)
246
247 -- | Parameters for Fis clustering
248 data FisParams = FisParams
249 { _fis_keepMinorFis :: Bool
250 , _fis_minSupport :: Support
251 , _fis_minSize :: Int
252 } deriving (Generic, Show, Eq, Read)
253
254 -- | Parameters for RelatedComponents clustering
255 data RCParams = RCParams
256 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
257
258 -- | Parameters for Louvain clustering
259 data LouvainParams = LouvainParams
260 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
261
262
263 -------------------
264 -- | Proximity | --
265 -------------------
266
267
268 -- | Proximity constructors
269 data Proximity = WeightedLogJaccard WLJParams
270 | Hamming HammingParams
271 | Filiation
272 deriving (Generic, Show, Eq, Read)
273
274 -- | Parameters for WeightedLogJaccard proximity
275 data WLJParams = WLJParams
276 { _wlj_threshold :: Double
277 , _wlj_sensibility :: Double
278 } deriving (Generic, Show, Eq, Read)
279
280 -- | Parameters for Hamming proximity
281 data HammingParams = HammingParams
282 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
283
284
285 ----------------
286 -- | Filter | --
287 ----------------
288
289
290 -- | Filter constructors
291 data Filter = LonelyBranch LBParams
292 | SizeBranch SBParams
293 deriving (Generic, Show, Eq)
294
295 -- | Parameters for LonelyBranch filter
296 data LBParams = LBParams
297 { _lb_periodsInf :: Int
298 , _lb_periodsSup :: Int
299 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
300
301 -- | Parameters for SizeBranch filter
302 data SBParams = SBParams
303 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
304
305
306 ----------------
307 -- | Metric | --
308 ----------------
309
310
311 -- | Metric constructors
312 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
313
314
315 ----------------
316 -- | Tagger | --
317 ----------------
318
319
320 -- | Tagger constructors
321 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
322
323
324 --------------
325 -- | Sort | --
326 --------------
327
328
329 -- | Sort constructors
330 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
331 data Order = Asc | Desc deriving (Generic, Show, Read)
332
333
334 --------------------
335 -- | PhyloQuery | --
336 --------------------
337
338
339 -- | A Phyloquery describes a phylomemic reconstruction
340 data PhyloQueryBuild = PhyloQueryBuild
341 { _q_phyloTitle :: Text
342 , _q_phyloDesc :: Text
343
344 -- Grain and Steps for the PhyloPeriods
345 , _q_periodGrain :: Int
346 , _q_periodSteps :: Int
347
348 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
349 , _q_contextualUnit :: Cluster
350 , _q_contextualUnitMetrics :: [Metric]
351 , _q_contextualUnitFilters :: [Filter]
352
353 -- Inter-temporal matching method of the Phylo
354 , _q_interTemporalMatching :: Proximity
355 , _q_interTemporalMatchingFrame :: Int
356 , _q_interTemporalMatchingFrameTh :: Double
357
358 , _q_reBranchThr :: Double
359 , _q_reBranchNth :: Int
360
361 -- Last level of reconstruction
362 , _q_nthLevel :: Level
363 -- Clustering method used from level 1 to nthLevel
364 , _q_nthCluster :: Cluster
365 } deriving (Generic, Show, Eq)
366
367 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
368 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
369 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
370
371 -------------------
372 -- | PhyloView | --
373 -------------------
374
375
376 -- | A PhyloView is the output type of a Phylo
377 data PhyloView = PhyloView
378 { _pv_param :: PhyloParam
379 , _pv_title :: Text
380 , _pv_description :: Text
381 , _pv_filiation :: Filiation
382 , _pv_level :: Level
383 , _pv_periods :: [PhyloPeriodId]
384 , _pv_metrics :: Map Text [Double]
385 , _pv_branches :: [PhyloBranch]
386 , _pv_nodes :: [PhyloNode]
387 , _pv_edges :: [PhyloEdge]
388 } deriving (Generic, Show)
389
390 -- | A phyloview is made of PhyloBranches, edges and nodes
391 data PhyloBranch = PhyloBranch
392 { _pb_id :: PhyloBranchId
393 , _pb_peak :: Text
394 , _pb_metrics :: Map Text [Double]
395 } deriving (Generic, Show)
396
397 data PhyloEdge = PhyloEdge
398 { _pe_source :: PhyloGroupId
399 , _pe_target :: PhyloGroupId
400 , _pe_type :: EdgeType
401 , _pe_weight :: Weight
402 } deriving (Generic, Show)
403
404 data PhyloNode = PhyloNode
405 { _pn_id :: PhyloGroupId
406 , _pn_bid :: Maybe PhyloBranchId
407 , _pn_label :: Text
408 , _pn_idx :: [Int]
409 , _pn_ngrams :: Maybe [Ngrams]
410 , _pn_metrics :: Map Text [Double]
411 , _pn_parents :: Maybe [PhyloGroupId]
412 , _pn_childs :: [PhyloNode]
413 } deriving (Generic, Show)
414
415 ------------------------
416 -- | PhyloQueryView | --
417 ------------------------
418
419
420 data ExportMode = Json | Dot | Svg
421 deriving (Generic, Show, Read)
422 data DisplayMode = Flat | Nested
423 deriving (Generic, Show, Read)
424
425 -- | A PhyloQueryView describes a Phylo as an output view
426 data PhyloQueryView = PhyloQueryView
427 { _qv_lvl :: Level
428
429 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
430 , _qv_filiation :: Filiation
431
432 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
433 , _qv_levelChilds :: Bool
434 , _qv_levelChildsDepth :: Level
435
436 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
437 -- Firstly the metrics, then the filters and the taggers
438 , _qv_metrics :: [Metric]
439 , _qv_filters :: [Filter]
440 , _qv_taggers :: [Tagger]
441
442 -- An asc or desc sort to apply to the PhyloGraph
443 , _qv_sort :: Maybe (Sort,Order)
444
445 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
446 , _qv_export :: ExportMode
447 , _qv_display :: DisplayMode
448 , _qv_verbose :: Bool
449 }
450
451
452 ----------------
453 -- | Lenses | --
454 ----------------
455
456
457 makeLenses ''PhyloParam
458 makeLenses ''Software
459 --
460 makeLenses ''Phylo
461 makeLenses ''PhyloFoundations
462 makeLenses ''PhyloGroup
463 makeLenses ''PhyloLevel
464 makeLenses ''PhyloPeriod
465 makeLenses ''PhyloFis
466 --
467 makeLenses ''Proximity
468 makeLenses ''Cluster
469 makeLenses ''Filter
470 --
471 makeLenses ''PhyloQueryBuild
472 makeLenses ''PhyloQueryView
473 --
474 makeLenses ''PhyloView
475 makeLenses ''PhyloBranch
476 makeLenses ''PhyloNode
477 makeLenses ''PhyloEdge
478
479
480 ------------------------
481 -- | JSON instances | --
482 ------------------------
483
484
485 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
486 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
487 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
488 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
489 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
490 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
491 --
492 $(deriveJSON (unPrefix "_software_" ) ''Software )
493 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
494 --
495 $(deriveJSON defaultOptions ''Filter )
496 $(deriveJSON defaultOptions ''Metric )
497 $(deriveJSON defaultOptions ''Cluster )
498 $(deriveJSON defaultOptions ''Proximity )
499 --
500 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
501 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
502 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
503 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
504 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
505 --
506 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
507 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
508 --
509 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
510 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
511 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
512 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
513 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
514
515 $(deriveJSON defaultOptions ''Filiation )
516 $(deriveJSON defaultOptions ''EdgeType )
517
518
519 ----------------------------
520 -- | TODO XML instances | --
521 ----------------------------
522