]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
phyloQuality
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
46
47 import Control.DeepSeq
48
49 --------------------
50 -- | PhyloParam | --
51 --------------------
52
53
54 -- | Global parameters of a Phylo
55 data PhyloParam =
56 PhyloParam { _phyloParam_version :: Text -- Double ?
57 , _phyloParam_software :: Software
58 , _phyloParam_query :: PhyloQueryBuild
59 } deriving (Generic, Show, Eq)
60
61
62 -- | Software parameters
63 data Software =
64 Software { _software_name :: Text
65 , _software_version :: Text
66 } deriving (Generic, Show, Eq)
67
68
69 ---------------
70 -- | Phylo | --
71 ---------------
72
73
74 -- | Phylo datatype of a phylomemy
75 -- Duration : time Segment of the whole Phylo
76 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
77 -- Periods : list of all the periods of a Phylo
78 data Phylo =
79 Phylo { _phylo_duration :: (Start, End)
80 , _phylo_foundations :: PhyloFoundations
81 , _phylo_periods :: [PhyloPeriod]
82 , _phylo_docsByYears :: Map Date Double
83 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
84 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
85 , _phylo_param :: PhyloParam
86 }
87 deriving (Generic, Show, Eq)
88
89
90 -- | The foundations of a phylomemy created from a given TermList
91 data PhyloFoundations =
92 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
93 , _phylo_foundationsTermsList :: TermList
94 } deriving (Generic, Show, Eq)
95
96
97 -- | Date : a simple Integer
98 type Date = Int
99
100 -- | UTCTime in seconds since UNIX epoch
101 -- type Start = POSIXTime
102 -- type End = POSIXTime
103 type Start = Date
104 type End = Date
105
106
107 ---------------------
108 -- | PhyloPeriod | --
109 ---------------------
110
111
112 -- | PhyloStep : steps of phylomemy on temporal axis
113 -- Period: tuple (start date, end date) of the step of the phylomemy
114 -- Levels: levels of granularity
115 data PhyloPeriod =
116 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
117 , _phylo_periodLevels :: [PhyloLevel]
118 }
119 deriving (Generic, Show, Eq)
120
121
122 --------------------
123 -- | PhyloLevel | --
124 --------------------
125
126
127 -- | PhyloLevel : levels of phylomemy on level axis
128 -- Levels description:
129 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
130 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
131 -- Level 1: First level of clustering
132 -- Level N: Nth level of clustering
133 data PhyloLevel =
134 PhyloLevel { _phylo_levelId :: PhyloLevelId
135 , _phylo_levelGroups :: [PhyloGroup]
136 }
137 deriving (Generic, Show, Eq)
138
139
140 --------------------
141 -- | PhyloGroup | --
142 --------------------
143
144
145 -- | PhyloGroup : group of ngrams at each level and step
146 -- Label : maybe has a label as text
147 -- Ngrams: set of terms that build the group
148 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
149 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
150 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
151 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
152 data PhyloGroup =
153 PhyloGroup { _phylo_groupId :: PhyloGroupId
154 , _phylo_groupLabel :: Text
155 , _phylo_groupNgrams :: [Int]
156 , _phylo_groupNgramsMeta :: Map Text [Double]
157 , _phylo_groupMeta :: Map Text Double
158 , _phylo_groupBranchId :: Maybe PhyloBranchId
159 , _phylo_groupCooc :: !(Map (Int,Int) Double)
160
161 , _phylo_groupPeriodParents :: [Pointer]
162 , _phylo_groupPeriodChilds :: [Pointer]
163
164 , _phylo_groupLevelParents :: [Pointer]
165 , _phylo_groupLevelChilds :: [Pointer]
166 }
167 deriving (Generic, NFData, Show, Eq, Ord)
168
169 -- instance NFData PhyloGroup
170
171
172 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
173 type Level = Int
174 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
175 type Index = Int
176
177
178 type PhyloPeriodId = (Start, End)
179 type PhyloLevelId = (PhyloPeriodId, Level)
180 type PhyloGroupId = (PhyloLevelId, Index)
181 type PhyloBranchId = (Level, Index)
182
183
184 -- | Weight : A generic mesure that can be associated with an Id
185 type Weight = Double
186 -- | Pointer : A weighted linked with a given PhyloGroup
187 type Pointer = (PhyloGroupId, Weight)
188 -- | Ngrams : a contiguous sequence of n terms
189 type Ngrams = Text
190
191
192 --------------------
193 -- | Aggregates | --
194 --------------------
195
196
197 -- | Document : a piece of Text linked to a Date
198 data Document = Document
199 { date :: Date
200 , text :: [Ngrams]
201 } deriving (Show,Generic,NFData)
202
203 -- | Clique : Set of ngrams cooccurring in the same Document
204 type Clique = Set Ngrams
205 -- | Support : Number of Documents where a Clique occurs
206 type Support = Int
207 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
208 data PhyloFis = PhyloFis
209 { _phyloFis_clique :: Clique
210 , _phyloFis_support :: Support
211 , _phyloFis_period :: (Date,Date)
212 } deriving (Generic,NFData,Show,Eq)
213
214 -- | A list of clustered PhyloGroup
215 type PhyloCluster = [PhyloGroup]
216
217
218 -- | A PhyloGroup in a Graph
219 type GroupNode = PhyloGroup
220 -- | A weighted links between two PhyloGroups in a Graph
221 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
222 -- | The association as a Graph between a list of Nodes and a list of Edges
223 type GroupGraph = ([GroupNode],[GroupEdge])
224
225
226 ---------------
227 -- | Error | --
228 ---------------
229
230
231 data PhyloError = LevelDoesNotExist
232 | LevelUnassigned
233 deriving (Show)
234
235
236 -----------------
237 -- | Cluster | --
238 -----------------
239
240
241 -- | Cluster constructors
242 data Cluster = Fis FisParams
243 | RelatedComponents RCParams
244 | Louvain LouvainParams
245 deriving (Generic, Show, Eq, Read)
246
247 -- | Parameters for Fis clustering
248 data FisParams = FisParams
249 { _fis_keepMinorFis :: Bool
250 , _fis_minSupport :: Support
251 , _fis_minSize :: Int
252 } deriving (Generic, Show, Eq, Read)
253
254 -- | Parameters for RelatedComponents clustering
255 data RCParams = RCParams
256 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
257
258 -- | Parameters for Louvain clustering
259 data LouvainParams = LouvainParams
260 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
261
262
263 -------------------
264 -- | Proximity | --
265 -------------------
266
267
268 -- | Proximity constructors
269 data Proximity = WeightedLogJaccard WLJParams
270 | Hamming HammingParams
271 | Filiation
272 deriving (Generic, Show, Eq, Read)
273
274 -- | Parameters for WeightedLogJaccard proximity
275 data WLJParams = WLJParams
276 { _wlj_threshold :: Double
277 , _wlj_sensibility :: Double
278 } deriving (Generic, Show, Eq, Read)
279
280 -- | Parameters for Hamming proximity
281 data HammingParams = HammingParams
282 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
283
284
285 ----------------
286 -- | Filter | --
287 ----------------
288
289
290 -- | Filter constructors
291 data Filter = LonelyBranch LBParams
292 | SizeBranch SBParams
293 deriving (Generic, Show, Eq)
294
295 -- | Parameters for LonelyBranch filter
296 data LBParams = LBParams
297 { _lb_periodsInf :: Int
298 , _lb_periodsSup :: Int
299 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
300
301 -- | Parameters for SizeBranch filter
302 data SBParams = SBParams
303 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
304
305
306 ----------------
307 -- | Metric | --
308 ----------------
309
310
311 -- | Metric constructors
312 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
313
314
315 ----------------
316 -- | Tagger | --
317 ----------------
318
319
320 -- | Tagger constructors
321 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
322 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
323
324
325 --------------
326 -- | Sort | --
327 --------------
328
329
330 -- | Sort constructors
331 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
332 data Order = Asc | Desc deriving (Generic, Show, Read)
333
334
335 --------------------
336 -- | PhyloQuery | --
337 --------------------
338
339
340 -- | A Phyloquery describes a phylomemic reconstruction
341 data PhyloQueryBuild = PhyloQueryBuild
342 { _q_phyloTitle :: Text
343 , _q_phyloDesc :: Text
344
345 -- Grain and Steps for the PhyloPeriods
346 , _q_periodGrain :: Int
347 , _q_periodSteps :: Int
348
349 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
350 , _q_contextualUnit :: Cluster
351 , _q_contextualUnitMetrics :: [Metric]
352 , _q_contextualUnitFilters :: [Filter]
353
354 -- Inter-temporal matching method of the Phylo
355 , _q_interTemporalMatching :: Proximity
356 , _q_interTemporalMatchingFrame :: Int
357 , _q_interTemporalMatchingFrameTh :: Double
358
359 , _q_reBranchThr :: Double
360 , _q_reBranchNth :: Int
361
362 -- Last level of reconstruction
363 , _q_nthLevel :: Level
364 -- Clustering method used from level 1 to nthLevel
365 , _q_nthCluster :: Cluster
366 } deriving (Generic, Show, Eq)
367
368 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
369 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
370 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
371
372 -------------------
373 -- | PhyloView | --
374 -------------------
375
376
377 -- | A PhyloView is the output type of a Phylo
378 data PhyloView = PhyloView
379 { _pv_param :: PhyloParam
380 , _pv_title :: Text
381 , _pv_description :: Text
382 , _pv_filiation :: Filiation
383 , _pv_level :: Level
384 , _pv_periods :: [PhyloPeriodId]
385 , _pv_metrics :: Map Text [Double]
386 , _pv_branches :: [PhyloBranch]
387 , _pv_nodes :: [PhyloNode]
388 , _pv_edges :: [PhyloEdge]
389 } deriving (Generic, Show)
390
391 -- | A phyloview is made of PhyloBranches, edges and nodes
392 data PhyloBranch = PhyloBranch
393 { _pb_id :: PhyloBranchId
394 , _pb_peak :: Text
395 , _pb_metrics :: Map Text [Double]
396 } deriving (Generic, Show)
397
398 data PhyloEdge = PhyloEdge
399 { _pe_source :: PhyloGroupId
400 , _pe_target :: PhyloGroupId
401 , _pe_type :: EdgeType
402 , _pe_weight :: Weight
403 } deriving (Generic, Show)
404
405 data PhyloNode = PhyloNode
406 { _pn_id :: PhyloGroupId
407 , _pn_bid :: Maybe PhyloBranchId
408 , _pn_label :: Text
409 , _pn_idx :: [Int]
410 , _pn_ngrams :: Maybe [Ngrams]
411 , _pn_metrics :: Map Text [Double]
412 , _pn_cooc :: Map (Int,Int) Double
413 , _pn_parents :: Maybe [PhyloGroupId]
414 , _pn_childs :: [PhyloNode]
415 } deriving (Generic, Show)
416
417 ------------------------
418 -- | PhyloQueryView | --
419 ------------------------
420
421
422 data ExportMode = Json | Dot | Svg
423 deriving (Generic, Show, Read)
424 data DisplayMode = Flat | Nested
425 deriving (Generic, Show, Read)
426
427 -- | A PhyloQueryView describes a Phylo as an output view
428 data PhyloQueryView = PhyloQueryView
429 { _qv_lvl :: Level
430
431 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
432 , _qv_filiation :: Filiation
433
434 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
435 , _qv_levelChilds :: Bool
436 , _qv_levelChildsDepth :: Level
437
438 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
439 -- Firstly the metrics, then the filters and the taggers
440 , _qv_metrics :: [Metric]
441 , _qv_filters :: [Filter]
442 , _qv_taggers :: [Tagger]
443
444 -- An asc or desc sort to apply to the PhyloGraph
445 , _qv_sort :: Maybe (Sort,Order)
446
447 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
448 , _qv_export :: ExportMode
449 , _qv_display :: DisplayMode
450 , _qv_verbose :: Bool
451 }
452
453
454 ----------------
455 -- | Lenses | --
456 ----------------
457
458
459 makeLenses ''PhyloParam
460 makeLenses ''Software
461 --
462 makeLenses ''Phylo
463 makeLenses ''PhyloFoundations
464 makeLenses ''PhyloGroup
465 makeLenses ''PhyloLevel
466 makeLenses ''PhyloPeriod
467 makeLenses ''PhyloFis
468 --
469 makeLenses ''Proximity
470 makeLenses ''Cluster
471 makeLenses ''Filter
472 --
473 makeLenses ''PhyloQueryBuild
474 makeLenses ''PhyloQueryView
475 --
476 makeLenses ''PhyloView
477 makeLenses ''PhyloBranch
478 makeLenses ''PhyloNode
479 makeLenses ''PhyloEdge
480
481
482 ------------------------
483 -- | JSON instances | --
484 ------------------------
485
486
487 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
488 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
489 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
490 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
491 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
492 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
493 --
494 $(deriveJSON (unPrefix "_software_" ) ''Software )
495 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
496 --
497 $(deriveJSON defaultOptions ''Filter )
498 $(deriveJSON defaultOptions ''Metric )
499 $(deriveJSON defaultOptions ''Cluster )
500 $(deriveJSON defaultOptions ''Proximity )
501 --
502 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
503 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
504 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
505 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
506 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
507 --
508 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
509 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
510 --
511 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
512 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
513 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
514 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
515 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
516
517 $(deriveJSON defaultOptions ''Filiation )
518 $(deriveJSON defaultOptions ''EdgeType )
519
520
521 ----------------------------
522 -- | TODO XML instances | --
523 ----------------------------
524