]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
Working on dot export
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 --import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 --import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45 --------------------
46 -- | PhyloParam | --
47 --------------------
48
49
50 -- | Global parameters of a Phylo
51 data PhyloParam =
52 PhyloParam { _phyloParam_version :: Text -- Double ?
53 , _phyloParam_software :: Software
54 , _phyloParam_query :: PhyloQueryBuild
55 } deriving (Generic, Show, Eq)
56
57
58 -- | Software parameters
59 data Software =
60 Software { _software_name :: Text
61 , _software_version :: Text
62 } deriving (Generic, Show, Eq)
63
64
65 ---------------
66 -- | Phylo | --
67 ---------------
68
69
70 -- | Phylo datatype of a phylomemy
71 -- Duration : time Segment of the whole Phylo
72 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
73 -- Periods : list of all the periods of a Phylo
74 data Phylo =
75 Phylo { _phylo_duration :: (Start, End)
76 , _phylo_foundations :: Vector Ngrams
77 , _phylo_foundationsPeaks :: PhyloPeaks
78 , _phylo_periods :: [PhyloPeriod]
79 , _phylo_param :: PhyloParam
80 }
81 deriving (Generic, Show, Eq)
82
83 -- | The PhyloPeaks describe the aggregation of some foundations Ngrams behind a list of Ngrams trees (ie: a forest)
84 -- PeaksLabels are the root labels of each Ngrams trees
85 data PhyloPeaks =
86 PhyloPeaks { _phylo_peaksLabels :: Vector Ngrams
87 , _phylo_peaksForest :: [Tree Ngrams]
88 }
89 deriving (Generic, Show, Eq)
90
91 -- | A Tree of Ngrams where each node is a label
92 data Tree a = Empty | Node a [Tree a] deriving (Show, Eq)
93
94
95 -- | Date : a simple Integer
96 type Date = Int
97
98 -- | UTCTime in seconds since UNIX epoch
99 -- type Start = POSIXTime
100 -- type End = POSIXTime
101 type Start = Date
102 type End = Date
103
104
105 ---------------------
106 -- | PhyloPeriod | --
107 ---------------------
108
109
110 -- | PhyloStep : steps of phylomemy on temporal axis
111 -- Period: tuple (start date, end date) of the step of the phylomemy
112 -- Levels: levels of granularity
113 data PhyloPeriod =
114 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
115 , _phylo_periodLevels :: [PhyloLevel]
116 }
117 deriving (Generic, Show, Eq)
118
119
120 --------------------
121 -- | PhyloLevel | --
122 --------------------
123
124
125 -- | PhyloLevel : levels of phylomemy on level axis
126 -- Levels description:
127 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
128 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
129 -- Level 1: First level of clustering
130 -- Level N: Nth level of clustering
131 data PhyloLevel =
132 PhyloLevel { _phylo_levelId :: PhyloLevelId
133 , _phylo_levelGroups :: [PhyloGroup]
134 }
135 deriving (Generic, Show, Eq)
136
137
138 --------------------
139 -- | PhyloGroup | --
140 --------------------
141
142
143 -- | PhyloGroup : group of ngrams at each level and step
144 -- Label : maybe has a label as text
145 -- Ngrams: set of terms that build the group
146 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
147 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
148 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
149 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
150 data PhyloGroup =
151 PhyloGroup { _phylo_groupId :: PhyloGroupId
152 , _phylo_groupLabel :: Text
153 , _phylo_groupNgrams :: [Int]
154 , _phylo_groupMeta :: Map Text Double
155 , _phylo_groupCooc :: Map (Int, Int) Double
156 , _phylo_groupBranchId :: Maybe PhyloBranchId
157
158 , _phylo_groupPeriodParents :: [Pointer]
159 , _phylo_groupPeriodChilds :: [Pointer]
160
161 , _phylo_groupLevelParents :: [Pointer]
162 , _phylo_groupLevelChilds :: [Pointer]
163 }
164 deriving (Generic, Show, Eq, Ord)
165
166
167 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
168 type Level = Int
169 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
170 type Index = Int
171
172
173 type PhyloPeriodId = (Start, End)
174 type PhyloLevelId = (PhyloPeriodId, Level)
175 type PhyloGroupId = (PhyloLevelId, Index)
176 type PhyloBranchId = (Level, Index)
177
178
179 -- | Weight : A generic mesure that can be associated with an Id
180 type Weight = Double
181 -- | Pointer : A weighted linked with a given PhyloGroup
182 type Pointer = (PhyloGroupId, Weight)
183 -- | Ngrams : a contiguous sequence of n terms
184 type Ngrams = Text
185
186
187 --------------------
188 -- | Aggregates | --
189 --------------------
190
191
192 -- | Document : a piece of Text linked to a Date
193 data Document = Document
194 { date :: Date
195 , text :: [Ngrams]
196 } deriving (Show)
197
198 -- | Clique : Set of ngrams cooccurring in the same Document
199 type Clique = Set Ngrams
200 -- | Support : Number of Documents where a Clique occurs
201 type Support = Int
202 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
203 data PhyloFis = PhyloFis
204 { _phyloFis_clique :: Clique
205 , _phyloFis_support :: Support
206 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
207 } deriving (Show)
208
209 -- | A list of clustered PhyloGroup
210 type PhyloCluster = [PhyloGroup]
211
212
213 -- | A List of PhyloGroup in a Graph
214 type GroupNodes = [PhyloGroup]
215 -- | A List of weighted links between some PhyloGroups in a Graph
216 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
217 -- | The association as a Graph between a list of Nodes and a list of Edges
218 type GroupGraph = (GroupNodes,GroupEdges)
219
220
221 ---------------
222 -- | Error | --
223 ---------------
224
225
226 data PhyloError = LevelDoesNotExist
227 | LevelUnassigned
228 deriving (Show)
229
230
231 -----------------
232 -- | Cluster | --
233 -----------------
234
235
236 -- | Cluster constructors
237 data Cluster = Fis FisParams
238 | RelatedComponents RCParams
239 | Louvain LouvainParams
240 deriving (Generic, Show, Eq)
241
242 -- | Parameters for Fis clustering
243 data FisParams = FisParams
244 { _fis_keepMinorFis :: Bool
245 , _fis_minSupport :: Support
246 } deriving (Generic, Show, Eq)
247
248 -- | Parameters for RelatedComponents clustering
249 data RCParams = RCParams
250 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq)
251
252 -- | Parameters for Louvain clustering
253 data LouvainParams = LouvainParams
254 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq)
255
256
257 -------------------
258 -- | Proximity | --
259 -------------------
260
261
262 -- | Proximity constructors
263 data Proximity = WeightedLogJaccard WLJParams
264 | Hamming HammingParams
265 | Filiation
266 deriving (Generic, Show, Eq)
267
268 -- | Parameters for WeightedLogJaccard proximity
269 data WLJParams = WLJParams
270 { _wlj_threshold :: Double
271 , _wlj_sensibility :: Double
272 } deriving (Generic, Show, Eq)
273
274 -- | Parameters for Hamming proximity
275 data HammingParams = HammingParams
276 { _hamming_threshold :: Double } deriving (Generic, Show, Eq)
277
278
279 ----------------
280 -- | Filter | --
281 ----------------
282
283
284 -- | Filter constructors
285 data Filter = SmallBranch SBParams deriving (Generic, Show, Eq)
286
287 -- | Parameters for SmallBranch filter
288 data SBParams = SBParams
289 { _sb_periodsInf :: Int
290 , _sb_periodsSup :: Int
291 , _sb_minNodes :: Int } deriving (Generic, Show, Eq)
292
293
294 ----------------
295 -- | Metric | --
296 ----------------
297
298
299 -- | Metric constructors
300 data Metric = BranchAge deriving (Generic, Show, Eq)
301
302
303 ----------------
304 -- | Tagger | --
305 ----------------
306
307
308 -- | Tagger constructors
309 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
310
311
312 --------------
313 -- | Sort | --
314 --------------
315
316
317 -- | Sort constructors
318 data Sort = ByBranchAge deriving (Generic, Show)
319 data Order = Asc | Desc deriving (Generic, Show)
320
321
322 --------------------
323 -- | PhyloQuery | --
324 --------------------
325
326
327 -- | A Phyloquery describes a phylomemic reconstruction
328 data PhyloQueryBuild = PhyloQueryBuild
329 { _q_phyloTitle :: Text
330 , _q_phyloDesc :: Text
331
332 -- Grain and Steps for the PhyloPeriods
333 , _q_periodGrain :: Int
334 , _q_periodSteps :: Int
335
336 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
337 , _q_contextualUnit :: Cluster
338 , _q_contextualUnitMetrics :: [Metric]
339 , _q_contextualUnitFilters :: [Filter]
340
341 -- Inter-temporal matching method of the Phylo
342 , _q_interTemporalMatching :: Proximity
343
344 -- Last level of reconstruction
345 , _q_nthLevel :: Level
346 -- Clustering method used from level 1 to nthLevel
347 , _q_nthCluster :: Cluster
348 } deriving (Generic, Show, Eq)
349
350 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
351 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show)
352 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show)
353
354 -------------------
355 -- | PhyloView | --
356 -------------------
357
358
359 -- | A PhyloView is the output type of a Phylo
360 data PhyloView = PhyloView
361 { _pv_param :: PhyloParam
362 , _pv_title :: Text
363 , _pv_description :: Text
364 , _pv_filiation :: Filiation
365 , _pv_metrics :: Map Text [Double]
366 , _pv_branches :: [PhyloBranch]
367 , _pv_nodes :: [PhyloNode]
368 , _pv_edges :: [PhyloEdge]
369 } deriving (Generic, Show)
370
371 -- | A phyloview is made of PhyloBranches, edges and nodes
372 data PhyloBranch = PhyloBranch
373 { _pb_id :: PhyloBranchId
374 , _pb_label :: Text
375 , _pb_metrics :: Map Text [Double]
376 } deriving (Generic, Show)
377
378 data PhyloEdge = PhyloEdge
379 { _pe_source :: PhyloGroupId
380 , _pe_target :: PhyloGroupId
381 , _pe_type :: EdgeType
382 , _pe_weight :: Weight
383 } deriving (Generic, Show)
384
385 data PhyloNode = PhyloNode
386 { _pn_id :: PhyloGroupId
387 , _pn_bid :: Maybe PhyloBranchId
388 , _pn_label :: Text
389 , _pn_idx :: [Int]
390 , _pn_ngrams :: Maybe [Ngrams]
391 , _pn_metrics :: Map Text [Double]
392 , _pn_parents :: Maybe [PhyloGroupId]
393 , _pn_childs :: [PhyloNode]
394 } deriving (Generic, Show)
395
396 ------------------------
397 -- | PhyloQueryView | --
398 ------------------------
399
400
401 data DisplayMode = Flat | Nested
402
403 -- | A PhyloQueryView describes a Phylo as an output view
404 data PhyloQueryView = PhyloQueryView
405 { _qv_lvl :: Level
406
407 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
408 , _qv_filiation :: Filiation
409
410 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
411 , _qv_levelChilds :: Bool
412 , _qv_levelChildsDepth :: Level
413
414 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
415 -- Firstly the metrics, then the filters and the taggers
416 , _qv_metrics :: [Metric]
417 , _qv_filters :: [Filter]
418 , _qv_taggers :: [Tagger]
419
420 -- An asc or desc sort to apply to the PhyloGraph
421 , _qv_sort :: Maybe (Sort,Order)
422
423 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
424 , _qv_display :: DisplayMode
425 , _qv_verbose :: Bool
426 }
427
428
429 ----------------
430 -- | Lenses | --
431 ----------------
432
433
434 makeLenses ''PhyloParam
435 makeLenses ''Software
436 --
437 makeLenses ''Phylo
438 makeLenses ''PhyloPeaks
439 makeLenses ''PhyloGroup
440 makeLenses ''PhyloLevel
441 makeLenses ''PhyloPeriod
442 makeLenses ''PhyloFis
443 --
444 makeLenses ''Proximity
445 makeLenses ''Cluster
446 makeLenses ''Filter
447 --
448 makeLenses ''PhyloQueryBuild
449 makeLenses ''PhyloQueryView
450 --
451 makeLenses ''PhyloView
452 makeLenses ''PhyloBranch
453 makeLenses ''PhyloNode
454 makeLenses ''PhyloEdge
455
456
457 ------------------------
458 -- | JSON instances | --
459 ------------------------
460
461
462 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
463 $(deriveJSON (unPrefix "_phylo_peaks" ) ''PhyloPeaks )
464 $(deriveJSON defaultOptions ''Tree )
465 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
466 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
467 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
468 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
469 --
470 $(deriveJSON (unPrefix "_software_" ) ''Software )
471 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
472 --
473 $(deriveJSON defaultOptions ''Filter )
474 $(deriveJSON defaultOptions ''Metric )
475 $(deriveJSON defaultOptions ''Cluster )
476 $(deriveJSON defaultOptions ''Proximity )
477 --
478 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
479 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
480 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
481 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
482 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
483 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
484 --
485 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
486 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
487 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
488 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
489 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
490
491 $(deriveJSON defaultOptions ''Filiation )
492 $(deriveJSON defaultOptions ''EdgeType )
493
494
495 ----------------------------
496 -- | TODO XML instances | --
497 ----------------------------
498