]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
add the phylopeaks and correct some bugs on the phylodocs
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45
46 --------------------
47 -- | PhyloParam | --
48 --------------------
49
50
51 -- | Global parameters of a Phylo
52 data PhyloParam =
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_query :: PhyloQuery
56 } deriving (Generic, Show)
57
58
59 -- | Software parameters
60 data Software =
61 Software { _software_name :: Text
62 , _software_version :: Text
63 } deriving (Generic, Show)
64
65
66 ---------------
67 -- | Phylo | --
68 ---------------
69
70
71 -- | Phylo datatype of a phylomemy
72 -- Duration : time Segment of the whole Phylo
73 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
74 -- Periods : list of all the periods of a Phylo
75 data Phylo =
76 Phylo { _phylo_duration :: (Start, End)
77 , _phylo_foundations :: Vector Ngrams
78 , _phylo_foundationsPeaks :: PhyloPeaks
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_param :: PhyloParam
81 }
82 deriving (Generic, Show)
83
84 -- | The PhyloPeaks describe the aggregation of some foundations Ngrams behind a list of Ngrams trees (ie: a forest)
85 -- PeaksLabels are the root labels of each Ngrams trees
86 data PhyloPeaks =
87 PhyloPeaks { _phylo_peaksLabels :: Vector Ngrams
88 , _phylo_peaksForest :: [Tree Ngrams]
89 }
90 deriving (Generic, Show)
91
92 -- | A Tree of Ngrams where each node is a label
93 data Tree a = Empty | Node a [Tree a] deriving (Show)
94
95
96 -- | Date : a simple Integer
97 type Date = Int
98
99 -- | UTCTime in seconds since UNIX epoch
100 -- type Start = POSIXTime
101 -- type End = POSIXTime
102 type Start = Date
103 type End = Date
104
105
106 ---------------------
107 -- | PhyloPeriod | --
108 ---------------------
109
110
111 -- | PhyloStep : steps of phylomemy on temporal axis
112 -- Period: tuple (start date, end date) of the step of the phylomemy
113 -- Levels: levels of granularity
114 data PhyloPeriod =
115 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
116 , _phylo_periodLevels :: [PhyloLevel]
117 }
118 deriving (Generic, Show)
119
120
121 --------------------
122 -- | PhyloLevel | --
123 --------------------
124
125
126 -- | PhyloLevel : levels of phylomemy on level axis
127 -- Levels description:
128 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
129 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
130 -- Level 1: First level of clustering
131 -- Level N: Nth level of clustering
132 data PhyloLevel =
133 PhyloLevel { _phylo_levelId :: PhyloLevelId
134 , _phylo_levelGroups :: [PhyloGroup]
135 }
136 deriving (Generic, Show)
137
138
139 --------------------
140 -- | PhyloGroup | --
141 --------------------
142
143
144 -- | PhyloGroup : group of ngrams at each level and step
145 -- Label : maybe has a label as text
146 -- Ngrams: set of terms that build the group
147 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
148 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
149 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
150 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
151 data PhyloGroup =
152 PhyloGroup { _phylo_groupId :: PhyloGroupId
153 , _phylo_groupLabel :: Text
154 , _phylo_groupNgrams :: [Int]
155 , _phylo_groupMeta :: Map Text Double
156 , _phylo_groupCooc :: Map (Int, Int) Double
157 , _phylo_groupBranchId :: Maybe PhyloBranchId
158
159 , _phylo_groupPeriodParents :: [Pointer]
160 , _phylo_groupPeriodChilds :: [Pointer]
161
162 , _phylo_groupLevelParents :: [Pointer]
163 , _phylo_groupLevelChilds :: [Pointer]
164 }
165 deriving (Generic, Show, Eq, Ord)
166
167
168 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
169 type Level = Int
170 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
171 type Index = Int
172
173
174 type PhyloPeriodId = (Start, End)
175 type PhyloLevelId = (PhyloPeriodId, Level)
176 type PhyloGroupId = (PhyloLevelId, Index)
177 type PhyloBranchId = (Level, Index)
178
179
180 -- | Weight : A generic mesure that can be associated with an Id
181 type Weight = Double
182 -- | Pointer : A weighted linked with a given PhyloGroup
183 type Pointer = (PhyloGroupId, Weight)
184 -- | Ngrams : a contiguous sequence of n terms
185 type Ngrams = Text
186
187
188 --------------------
189 -- | Aggregates | --
190 --------------------
191
192
193 -- | Document : a piece of Text linked to a Date
194 data Document = Document
195 { date :: Date
196 , text :: [Ngrams]
197 } deriving (Show)
198
199 -- | Clique : Set of ngrams cooccurring in the same Document
200 type Clique = Set Ngrams
201 -- | Support : Number of Documents where a Clique occurs
202 type Support = Int
203 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
204 type PhyloFis = (Clique,Support)
205
206
207 -- | A list of clustered PhyloGroup
208 type PhyloCluster = [PhyloGroup]
209
210
211 -- | A List of PhyloGroup in a Graph
212 type GroupNodes = [PhyloGroup]
213 -- | A List of weighted links between some PhyloGroups in a Graph
214 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
215 -- | The association as a Graph between a list of Nodes and a list of Edges
216 type GroupGraph = (GroupNodes,GroupEdges)
217
218
219 ---------------
220 -- | Error | --
221 ---------------
222
223
224 data PhyloError = LevelDoesNotExist
225 | LevelUnassigned
226 deriving (Show)
227
228
229 -----------------
230 -- | Cluster | --
231 -----------------
232
233
234 -- | Cluster constructors
235 data Cluster = Fis FisParams
236 | RelatedComponents RCParams
237 | Louvain LouvainParams
238 deriving (Show)
239
240 -- | Parameters for Fis clustering
241 data FisParams = FisParams
242 { _fis_filtered :: Bool
243 , _fis_keepMinorFis :: Bool
244 , _fis_minSupport :: Support
245 } deriving (Show)
246
247 -- | Parameters for RelatedComponents clustering
248 data RCParams = RCParams
249 { _rc_proximity :: Proximity } deriving (Show)
250
251 -- | Parameters for Louvain clustering
252 data LouvainParams = LouvainParams
253 { _louvain_proximity :: Proximity } deriving (Show)
254
255
256 -------------------
257 -- | Proximity | --
258 -------------------
259
260
261 -- | Proximity constructors
262 data Proximity = WeightedLogJaccard WLJParams
263 | Hamming HammingParams
264 | Filiation
265 deriving (Show)
266
267 -- | Parameters for WeightedLogJaccard proximity
268 data WLJParams = WLJParams
269 { _wlj_threshold :: Double
270 , _wlj_sensibility :: Double
271 } deriving (Show)
272
273 -- | Parameters for Hamming proximity
274 data HammingParams = HammingParams
275 { _hamming_threshold :: Double } deriving (Show)
276
277
278 ----------------
279 -- | Filter | --
280 ----------------
281
282
283 -- | Filter constructors
284 data Filter = SmallBranch SBParams deriving (Show)
285
286 -- | Parameters for SmallBranch filter
287 data SBParams = SBParams
288 { _sb_periodsInf :: Int
289 , _sb_periodsSup :: Int
290 , _sb_minNodes :: Int } deriving (Show)
291
292
293 ----------------
294 -- | Metric | --
295 ----------------
296
297
298 -- | Metric constructors
299 data Metric = BranchAge deriving (Show)
300
301
302 ----------------
303 -- | Tagger | --
304 ----------------
305
306
307 -- | Tagger constructors
308 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
309
310
311 --------------
312 -- | Sort | --
313 --------------
314
315
316 -- | Sort constructors
317 data Sort = ByBranchAge deriving (Show)
318 data Order = Asc | Desc deriving (Show)
319
320
321 --------------------
322 -- | PhyloQuery | --
323 --------------------
324
325
326 -- | A Phyloquery describes a phylomemic reconstruction
327 data PhyloQuery = PhyloQuery
328 { _q_phyloTitle :: Text
329 , _q_phyloDesc :: Text
330
331 -- Grain and Steps for the PhyloPeriods
332 , _q_periodGrain :: Int
333 , _q_periodSteps :: Int
334
335 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
336 , _q_contextualUnit :: Cluster
337
338 -- Inter-temporal matching method of the Phylo
339 , _q_interTemporalMatching :: Proximity
340
341 -- Last level of reconstruction
342 , _q_nthLevel :: Level
343 -- Clustering method used from level 1 to nthLevel
344 , _q_nthCluster :: Cluster
345 } deriving (Show)
346
347 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
348 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Show)
349 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
350
351
352 -------------------
353 -- | PhyloView | --
354 -------------------
355
356
357 -- | A PhyloView is the output type of a Phylo
358 data PhyloView = PhyloView
359 { _phylo_viewParam :: PhyloParam
360 , _phylo_viewTitle :: Text
361 , _phylo_viewDescription :: Text
362 , _phylo_viewFiliation :: Filiation
363 , _phylo_viewMetrics :: Map Text [Double]
364 , _phylo_viewBranches :: [PhyloBranch]
365 , _phylo_viewNodes :: [PhyloNode]
366 , _phylo_viewEdges :: [PhyloEdge]
367 } deriving (Show)
368
369 -- | A phyloview is made of PhyloBranches, edges and nodes
370 data PhyloBranch = PhyloBranch
371 { _phylo_branchId :: PhyloBranchId
372 , _phylo_branchLabel :: Text
373 , _phylo_branchMetrics :: Map Text [Double]
374 } deriving (Show)
375
376 data PhyloEdge = PhyloEdge
377 { _phylo_edgeSource :: PhyloGroupId
378 , _phylo_edgeTarget :: PhyloGroupId
379 , _phylo_edgeType :: EdgeType
380 , _phylo_edgeWeight :: Weight
381 } deriving (Show)
382
383 data PhyloNode = PhyloNode
384 { _phylo_nodeId :: PhyloGroupId
385 , _phylo_nodeBranchId :: Maybe PhyloBranchId
386 , _phylo_nodeLabel :: Text
387 , _phylo_nodeNgramsIdx :: [Int]
388 , _phylo_nodeNgrams :: Maybe [Ngrams]
389 , _phylo_nodeMetrics :: Map Text [Double]
390 , _phylo_nodeLevelParents :: Maybe [PhyloGroupId]
391 , _phylo_nodeLevelChilds :: [PhyloNode]
392 } deriving (Show)
393
394
395 ------------------------
396 -- | PhyloQueryView | --
397 ------------------------
398
399
400 data DisplayMode = Flat | Nested
401
402 -- | A PhyloQueryView describes a Phylo as an output view
403 data PhyloQueryView = PhyloQueryView
404 { _qv_lvl :: Level
405
406 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
407 , _qv_filiation :: Filiation
408
409 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
410 , _qv_levelChilds :: Bool
411 , _qv_levelChildsDepth :: Level
412
413 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
414 -- Firstly the metrics, then the filters and the taggers
415 , _qv_metrics :: [Metric]
416 , _qv_filters :: [Filter]
417 , _qv_taggers :: [Tagger]
418
419 -- An asc or desc sort to apply to the PhyloGraph
420 , _qv_sort :: Maybe (Sort,Order)
421
422 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
423 , _qv_display :: DisplayMode
424 , _qv_verbose :: Bool
425 }
426
427
428 ----------------
429 -- | Lenses | --
430 ----------------
431
432
433 makeLenses ''PhyloParam
434 makeLenses ''Software
435 --
436 makeLenses ''Phylo
437 makeLenses ''PhyloPeaks
438 makeLenses ''PhyloGroup
439 makeLenses ''PhyloLevel
440 makeLenses ''PhyloPeriod
441 --
442 makeLenses ''Proximity
443 makeLenses ''Cluster
444 makeLenses ''Filter
445 --
446 makeLenses ''PhyloQuery
447 makeLenses ''PhyloQueryView
448 --
449 makeLenses ''PhyloView
450 makeLenses ''PhyloBranch
451 makeLenses ''PhyloNode
452 makeLenses ''PhyloEdge
453
454
455 ------------------------
456 -- | JSON instances | --
457 ------------------------
458
459
460 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
461 $(deriveJSON (unPrefix "_phylo_peaks" ) ''PhyloPeaks )
462 $(deriveJSON defaultOptions ''Tree )
463 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
464 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
465 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
466 --
467 $(deriveJSON (unPrefix "_software_" ) ''Software )
468 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
469 --
470 $(deriveJSON defaultOptions ''Cluster )
471 $(deriveJSON defaultOptions ''Proximity )
472 --
473 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
474 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
475 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
476 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
477 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
478 --
479 $(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
480
481
482 ----------------------------
483 -- | TODO XML instances | --
484 ----------------------------
485