]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
adapt Main to example
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 --import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 --import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45 --------------------
46 -- | PhyloParam | --
47 --------------------
48
49
50 -- | Global parameters of a Phylo
51 data PhyloParam =
52 PhyloParam { _phyloParam_version :: Text -- Double ?
53 , _phyloParam_software :: Software
54 , _phyloParam_query :: PhyloQueryBuild
55 } deriving (Generic, Show, Eq)
56
57
58 -- | Software parameters
59 data Software =
60 Software { _software_name :: Text
61 , _software_version :: Text
62 } deriving (Generic, Show, Eq)
63
64
65 ---------------
66 -- | Phylo | --
67 ---------------
68
69
70 -- | Phylo datatype of a phylomemy
71 -- Duration : time Segment of the whole Phylo
72 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
73 -- Periods : list of all the periods of a Phylo
74 data Phylo =
75 Phylo { _phylo_duration :: (Start, End)
76 , _phylo_foundations :: Vector Ngrams
77 , _phylo_foundationsRoots :: PhyloRoots
78 , _phylo_periods :: [PhyloPeriod]
79 , _phylo_param :: PhyloParam
80 }
81 deriving (Generic, Show, Eq)
82
83 -- | The PhyloRoots describe the aggregation of some foundations Ngrams behind a list of Ngrams trees (ie: a forest)
84 -- PeaksLabels are the root labels of each Ngrams trees
85 data PhyloRoots =
86 PhyloRoots { _phylo_rootsLabels :: Vector Ngrams
87 , _phylo_rootsForest :: [Tree Ngrams]
88 }
89 deriving (Generic, Show, Eq)
90
91 -- | A Tree of Ngrams where each node is a label
92 data Tree a = Empty | Node a [Tree a] deriving (Show, Eq)
93
94
95 -- | Date : a simple Integer
96 type Date = Int
97
98 -- | UTCTime in seconds since UNIX epoch
99 -- type Start = POSIXTime
100 -- type End = POSIXTime
101 type Start = Date
102 type End = Date
103
104
105 ---------------------
106 -- | PhyloPeriod | --
107 ---------------------
108
109
110 -- | PhyloStep : steps of phylomemy on temporal axis
111 -- Period: tuple (start date, end date) of the step of the phylomemy
112 -- Levels: levels of granularity
113 data PhyloPeriod =
114 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
115 , _phylo_periodLevels :: [PhyloLevel]
116 }
117 deriving (Generic, Show, Eq)
118
119
120 --------------------
121 -- | PhyloLevel | --
122 --------------------
123
124
125 -- | PhyloLevel : levels of phylomemy on level axis
126 -- Levels description:
127 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
128 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
129 -- Level 1: First level of clustering
130 -- Level N: Nth level of clustering
131 data PhyloLevel =
132 PhyloLevel { _phylo_levelId :: PhyloLevelId
133 , _phylo_levelGroups :: [PhyloGroup]
134 }
135 deriving (Generic, Show, Eq)
136
137
138 --------------------
139 -- | PhyloGroup | --
140 --------------------
141
142
143 -- | PhyloGroup : group of ngrams at each level and step
144 -- Label : maybe has a label as text
145 -- Ngrams: set of terms that build the group
146 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
147 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
148 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
149 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
150 data PhyloGroup =
151 PhyloGroup { _phylo_groupId :: PhyloGroupId
152 , _phylo_groupLabel :: Text
153 , _phylo_groupNgrams :: [Int]
154 , _phylo_groupMeta :: Map Text Double
155 , _phylo_groupCooc :: Map (Int, Int) Double
156 , _phylo_groupBranchId :: Maybe PhyloBranchId
157
158 , _phylo_groupPeriodParents :: [Pointer]
159 , _phylo_groupPeriodChilds :: [Pointer]
160
161 , _phylo_groupLevelParents :: [Pointer]
162 , _phylo_groupLevelChilds :: [Pointer]
163 }
164 deriving (Generic, Show, Eq, Ord)
165
166
167 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
168 type Level = Int
169 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
170 type Index = Int
171
172
173 type PhyloPeriodId = (Start, End)
174 type PhyloLevelId = (PhyloPeriodId, Level)
175 type PhyloGroupId = (PhyloLevelId, Index)
176 type PhyloBranchId = (Level, Index)
177
178
179 -- | Weight : A generic mesure that can be associated with an Id
180 type Weight = Double
181 -- | Pointer : A weighted linked with a given PhyloGroup
182 type Pointer = (PhyloGroupId, Weight)
183 -- | Ngrams : a contiguous sequence of n terms
184 type Ngrams = Text
185
186
187 --------------------
188 -- | Aggregates | --
189 --------------------
190
191
192 -- | Document : a piece of Text linked to a Date
193 data Document = Document
194 { date :: Date
195 , text :: [Ngrams]
196 } deriving (Show,Generic)
197
198 -- | Clique : Set of ngrams cooccurring in the same Document
199 type Clique = Set Ngrams
200 -- | Support : Number of Documents where a Clique occurs
201 type Support = Int
202 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
203 data PhyloFis = PhyloFis
204 { _phyloFis_clique :: Clique
205 , _phyloFis_support :: Support
206 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
207 } deriving (Show)
208
209 -- | A list of clustered PhyloGroup
210 type PhyloCluster = [PhyloGroup]
211
212
213 -- | A List of PhyloGroup in a Graph
214 type GroupNodes = [PhyloGroup]
215 -- | A List of weighted links between some PhyloGroups in a Graph
216 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
217 -- | The association as a Graph between a list of Nodes and a list of Edges
218 type GroupGraph = (GroupNodes,GroupEdges)
219
220
221 ---------------
222 -- | Error | --
223 ---------------
224
225
226 data PhyloError = LevelDoesNotExist
227 | LevelUnassigned
228 deriving (Show)
229
230
231 -----------------
232 -- | Cluster | --
233 -----------------
234
235
236 -- | Cluster constructors
237 data Cluster = Fis FisParams
238 | RelatedComponents RCParams
239 | Louvain LouvainParams
240 deriving (Generic, Show, Eq)
241
242 -- | Parameters for Fis clustering
243 data FisParams = FisParams
244 { _fis_keepMinorFis :: Bool
245 , _fis_minSupport :: Support
246 } deriving (Generic, Show, Eq)
247
248 -- | Parameters for RelatedComponents clustering
249 data RCParams = RCParams
250 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq)
251
252 -- | Parameters for Louvain clustering
253 data LouvainParams = LouvainParams
254 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq)
255
256
257 -------------------
258 -- | Proximity | --
259 -------------------
260
261
262 -- | Proximity constructors
263 data Proximity = WeightedLogJaccard WLJParams
264 | Hamming HammingParams
265 | Filiation
266 deriving (Generic, Show, Eq)
267
268 -- | Parameters for WeightedLogJaccard proximity
269 data WLJParams = WLJParams
270 { _wlj_threshold :: Double
271 , _wlj_sensibility :: Double
272 } deriving (Generic, Show, Eq)
273
274 -- | Parameters for Hamming proximity
275 data HammingParams = HammingParams
276 { _hamming_threshold :: Double } deriving (Generic, Show, Eq)
277
278
279 ----------------
280 -- | Filter | --
281 ----------------
282
283
284 -- | Filter constructors
285 data Filter = SmallBranch SBParams deriving (Generic, Show, Eq)
286
287 -- | Parameters for SmallBranch filter
288 data SBParams = SBParams
289 { _sb_periodsInf :: Int
290 , _sb_periodsSup :: Int
291 , _sb_minNodes :: Int } deriving (Generic, Show, Eq)
292
293
294 ----------------
295 -- | Metric | --
296 ----------------
297
298
299 -- | Metric constructors
300 data Metric = BranchAge deriving (Generic, Show, Eq)
301
302
303 ----------------
304 -- | Tagger | --
305 ----------------
306
307
308 -- | Tagger constructors
309 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show)
310
311
312 --------------
313 -- | Sort | --
314 --------------
315
316
317 -- | Sort constructors
318 data Sort = ByBranchAge deriving (Generic, Show)
319 data Order = Asc | Desc deriving (Generic, Show)
320
321
322 --------------------
323 -- | PhyloQuery | --
324 --------------------
325
326
327 -- | A Phyloquery describes a phylomemic reconstruction
328 data PhyloQueryBuild = PhyloQueryBuild
329 { _q_phyloTitle :: Text
330 , _q_phyloDesc :: Text
331
332 -- Grain and Steps for the PhyloPeriods
333 , _q_periodGrain :: Int
334 , _q_periodSteps :: Int
335
336 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
337 , _q_contextualUnit :: Cluster
338 , _q_contextualUnitMetrics :: [Metric]
339 , _q_contextualUnitFilters :: [Filter]
340
341 -- Inter-temporal matching method of the Phylo
342 , _q_interTemporalMatching :: Proximity
343
344 -- Last level of reconstruction
345 , _q_nthLevel :: Level
346 -- Clustering method used from level 1 to nthLevel
347 , _q_nthCluster :: Cluster
348 } deriving (Generic, Show, Eq)
349
350 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
351 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show)
352 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
353
354 -------------------
355 -- | PhyloView | --
356 -------------------
357
358
359 -- | A PhyloView is the output type of a Phylo
360 data PhyloView = PhyloView
361 { _pv_param :: PhyloParam
362 , _pv_title :: Text
363 , _pv_description :: Text
364 , _pv_filiation :: Filiation
365 , _pv_level :: Level
366 , _pv_periods :: [PhyloPeriodId]
367 , _pv_metrics :: Map Text [Double]
368 , _pv_branches :: [PhyloBranch]
369 , _pv_nodes :: [PhyloNode]
370 , _pv_edges :: [PhyloEdge]
371 } deriving (Generic, Show)
372
373 -- | A phyloview is made of PhyloBranches, edges and nodes
374 data PhyloBranch = PhyloBranch
375 { _pb_id :: PhyloBranchId
376 , _pb_peak :: Text
377 , _pb_metrics :: Map Text [Double]
378 } deriving (Generic, Show)
379
380 data PhyloEdge = PhyloEdge
381 { _pe_source :: PhyloGroupId
382 , _pe_target :: PhyloGroupId
383 , _pe_type :: EdgeType
384 , _pe_weight :: Weight
385 } deriving (Generic, Show)
386
387 data PhyloNode = PhyloNode
388 { _pn_id :: PhyloGroupId
389 , _pn_bid :: Maybe PhyloBranchId
390 , _pn_label :: Text
391 , _pn_idx :: [Int]
392 , _pn_ngrams :: Maybe [Ngrams]
393 , _pn_metrics :: Map Text [Double]
394 , _pn_parents :: Maybe [PhyloGroupId]
395 , _pn_childs :: [PhyloNode]
396 } deriving (Generic, Show)
397
398 ------------------------
399 -- | PhyloQueryView | --
400 ------------------------
401
402
403 data ExportMode = Json | Dot | Svg
404 data DisplayMode = Flat | Nested
405
406 -- | A PhyloQueryView describes a Phylo as an output view
407 data PhyloQueryView = PhyloQueryView
408 { _qv_lvl :: Level
409
410 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
411 , _qv_filiation :: Filiation
412
413 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
414 , _qv_levelChilds :: Bool
415 , _qv_levelChildsDepth :: Level
416
417 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
418 -- Firstly the metrics, then the filters and the taggers
419 , _qv_metrics :: [Metric]
420 , _qv_filters :: [Filter]
421 , _qv_taggers :: [Tagger]
422
423 -- An asc or desc sort to apply to the PhyloGraph
424 , _qv_sort :: Maybe (Sort,Order)
425
426 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
427 , _qv_export :: ExportMode
428 , _qv_display :: DisplayMode
429 , _qv_verbose :: Bool
430 }
431
432
433 ----------------
434 -- | Lenses | --
435 ----------------
436
437
438 makeLenses ''PhyloParam
439 makeLenses ''Software
440 --
441 makeLenses ''Phylo
442 makeLenses ''PhyloRoots
443 makeLenses ''PhyloGroup
444 makeLenses ''PhyloLevel
445 makeLenses ''PhyloPeriod
446 makeLenses ''PhyloFis
447 --
448 makeLenses ''Proximity
449 makeLenses ''Cluster
450 makeLenses ''Filter
451 --
452 makeLenses ''PhyloQueryBuild
453 makeLenses ''PhyloQueryView
454 --
455 makeLenses ''PhyloView
456 makeLenses ''PhyloBranch
457 makeLenses ''PhyloNode
458 makeLenses ''PhyloEdge
459
460
461 ------------------------
462 -- | JSON instances | --
463 ------------------------
464
465
466 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
467 $(deriveJSON (unPrefix "_phylo_roots" ) ''PhyloRoots )
468 $(deriveJSON defaultOptions ''Tree )
469 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
470 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
471 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
472 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
473 --
474 $(deriveJSON (unPrefix "_software_" ) ''Software )
475 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
476 --
477 $(deriveJSON defaultOptions ''Filter )
478 $(deriveJSON defaultOptions ''Metric )
479 $(deriveJSON defaultOptions ''Cluster )
480 $(deriveJSON defaultOptions ''Proximity )
481 --
482 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
483 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
484 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
485 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
486 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
487 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
488 --
489 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
490 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
491 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
492 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
493 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
494
495 $(deriveJSON defaultOptions ''Filiation )
496 $(deriveJSON defaultOptions ''EdgeType )
497
498
499 ----------------------------
500 -- | TODO XML instances | --
501 ----------------------------
502