]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
just finalized the dot export
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 --import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 --import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45 --------------------
46 -- | PhyloParam | --
47 --------------------
48
49
50 -- | Global parameters of a Phylo
51 data PhyloParam =
52 PhyloParam { _phyloParam_version :: Text -- Double ?
53 , _phyloParam_software :: Software
54 , _phyloParam_query :: PhyloQueryBuild
55 } deriving (Generic, Show, Eq)
56
57
58 -- | Software parameters
59 data Software =
60 Software { _software_name :: Text
61 , _software_version :: Text
62 } deriving (Generic, Show, Eq)
63
64
65 ---------------
66 -- | Phylo | --
67 ---------------
68
69
70 -- | Phylo datatype of a phylomemy
71 -- Duration : time Segment of the whole Phylo
72 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
73 -- Periods : list of all the periods of a Phylo
74 data Phylo =
75 Phylo { _phylo_duration :: (Start, End)
76 , _phylo_foundations :: Vector Ngrams
77 , _phylo_foundationsPeaks :: PhyloPeaks
78 , _phylo_periods :: [PhyloPeriod]
79 , _phylo_param :: PhyloParam
80 }
81 deriving (Generic, Show, Eq)
82
83 -- | The PhyloPeaks describe the aggregation of some foundations Ngrams behind a list of Ngrams trees (ie: a forest)
84 -- PeaksLabels are the root labels of each Ngrams trees
85 data PhyloPeaks =
86 PhyloPeaks { _phylo_peaksLabels :: Vector Ngrams
87 , _phylo_peaksForest :: [Tree Ngrams]
88 }
89 deriving (Generic, Show, Eq)
90
91 -- | A Tree of Ngrams where each node is a label
92 data Tree a = Empty | Node a [Tree a] deriving (Show, Eq)
93
94
95 -- | Date : a simple Integer
96 type Date = Int
97
98 -- | UTCTime in seconds since UNIX epoch
99 -- type Start = POSIXTime
100 -- type End = POSIXTime
101 type Start = Date
102 type End = Date
103
104
105 ---------------------
106 -- | PhyloPeriod | --
107 ---------------------
108
109
110 -- | PhyloStep : steps of phylomemy on temporal axis
111 -- Period: tuple (start date, end date) of the step of the phylomemy
112 -- Levels: levels of granularity
113 data PhyloPeriod =
114 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
115 , _phylo_periodLevels :: [PhyloLevel]
116 }
117 deriving (Generic, Show, Eq)
118
119
120 --------------------
121 -- | PhyloLevel | --
122 --------------------
123
124
125 -- | PhyloLevel : levels of phylomemy on level axis
126 -- Levels description:
127 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
128 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
129 -- Level 1: First level of clustering
130 -- Level N: Nth level of clustering
131 data PhyloLevel =
132 PhyloLevel { _phylo_levelId :: PhyloLevelId
133 , _phylo_levelGroups :: [PhyloGroup]
134 }
135 deriving (Generic, Show, Eq)
136
137
138 --------------------
139 -- | PhyloGroup | --
140 --------------------
141
142
143 -- | PhyloGroup : group of ngrams at each level and step
144 -- Label : maybe has a label as text
145 -- Ngrams: set of terms that build the group
146 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
147 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
148 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
149 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
150 data PhyloGroup =
151 PhyloGroup { _phylo_groupId :: PhyloGroupId
152 , _phylo_groupLabel :: Text
153 , _phylo_groupNgrams :: [Int]
154 , _phylo_groupMeta :: Map Text Double
155 , _phylo_groupCooc :: Map (Int, Int) Double
156 , _phylo_groupBranchId :: Maybe PhyloBranchId
157
158 , _phylo_groupPeriodParents :: [Pointer]
159 , _phylo_groupPeriodChilds :: [Pointer]
160
161 , _phylo_groupLevelParents :: [Pointer]
162 , _phylo_groupLevelChilds :: [Pointer]
163 }
164 deriving (Generic, Show, Eq, Ord)
165
166
167 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
168 type Level = Int
169 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
170 type Index = Int
171
172
173 type PhyloPeriodId = (Start, End)
174 type PhyloLevelId = (PhyloPeriodId, Level)
175 type PhyloGroupId = (PhyloLevelId, Index)
176 type PhyloBranchId = (Level, Index)
177
178
179 -- | Weight : A generic mesure that can be associated with an Id
180 type Weight = Double
181 -- | Pointer : A weighted linked with a given PhyloGroup
182 type Pointer = (PhyloGroupId, Weight)
183 -- | Ngrams : a contiguous sequence of n terms
184 type Ngrams = Text
185
186
187 --------------------
188 -- | Aggregates | --
189 --------------------
190
191
192 -- | Document : a piece of Text linked to a Date
193 data Document = Document
194 { date :: Date
195 , text :: [Ngrams]
196 } deriving (Show)
197
198 -- | Clique : Set of ngrams cooccurring in the same Document
199 type Clique = Set Ngrams
200 -- | Support : Number of Documents where a Clique occurs
201 type Support = Int
202 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
203 data PhyloFis = PhyloFis
204 { _phyloFis_clique :: Clique
205 , _phyloFis_support :: Support
206 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
207 } deriving (Show)
208
209 -- | A list of clustered PhyloGroup
210 type PhyloCluster = [PhyloGroup]
211
212
213 -- | A List of PhyloGroup in a Graph
214 type GroupNodes = [PhyloGroup]
215 -- | A List of weighted links between some PhyloGroups in a Graph
216 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
217 -- | The association as a Graph between a list of Nodes and a list of Edges
218 type GroupGraph = (GroupNodes,GroupEdges)
219
220
221 ---------------
222 -- | Error | --
223 ---------------
224
225
226 data PhyloError = LevelDoesNotExist
227 | LevelUnassigned
228 deriving (Show)
229
230
231 -----------------
232 -- | Cluster | --
233 -----------------
234
235
236 -- | Cluster constructors
237 data Cluster = Fis FisParams
238 | RelatedComponents RCParams
239 | Louvain LouvainParams
240 deriving (Generic, Show, Eq)
241
242 -- | Parameters for Fis clustering
243 data FisParams = FisParams
244 { _fis_keepMinorFis :: Bool
245 , _fis_minSupport :: Support
246 } deriving (Generic, Show, Eq)
247
248 -- | Parameters for RelatedComponents clustering
249 data RCParams = RCParams
250 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq)
251
252 -- | Parameters for Louvain clustering
253 data LouvainParams = LouvainParams
254 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq)
255
256
257 -------------------
258 -- | Proximity | --
259 -------------------
260
261
262 -- | Proximity constructors
263 data Proximity = WeightedLogJaccard WLJParams
264 | Hamming HammingParams
265 | Filiation
266 deriving (Generic, Show, Eq)
267
268 -- | Parameters for WeightedLogJaccard proximity
269 data WLJParams = WLJParams
270 { _wlj_threshold :: Double
271 , _wlj_sensibility :: Double
272 } deriving (Generic, Show, Eq)
273
274 -- | Parameters for Hamming proximity
275 data HammingParams = HammingParams
276 { _hamming_threshold :: Double } deriving (Generic, Show, Eq)
277
278
279 ----------------
280 -- | Filter | --
281 ----------------
282
283
284 -- | Filter constructors
285 data Filter = SmallBranch SBParams deriving (Generic, Show, Eq)
286
287 -- | Parameters for SmallBranch filter
288 data SBParams = SBParams
289 { _sb_periodsInf :: Int
290 , _sb_periodsSup :: Int
291 , _sb_minNodes :: Int } deriving (Generic, Show, Eq)
292
293
294 ----------------
295 -- | Metric | --
296 ----------------
297
298
299 -- | Metric constructors
300 data Metric = BranchAge deriving (Generic, Show, Eq)
301
302
303 ----------------
304 -- | Tagger | --
305 ----------------
306
307
308 -- | Tagger constructors
309 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
310
311
312 --------------
313 -- | Sort | --
314 --------------
315
316
317 -- | Sort constructors
318 data Sort = ByBranchAge deriving (Generic, Show)
319 data Order = Asc | Desc deriving (Generic, Show)
320
321
322 --------------------
323 -- | PhyloQuery | --
324 --------------------
325
326
327 -- | A Phyloquery describes a phylomemic reconstruction
328 data PhyloQueryBuild = PhyloQueryBuild
329 { _q_phyloTitle :: Text
330 , _q_phyloDesc :: Text
331
332 -- Grain and Steps for the PhyloPeriods
333 , _q_periodGrain :: Int
334 , _q_periodSteps :: Int
335
336 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
337 , _q_contextualUnit :: Cluster
338 , _q_contextualUnitMetrics :: [Metric]
339 , _q_contextualUnitFilters :: [Filter]
340
341 -- Inter-temporal matching method of the Phylo
342 , _q_interTemporalMatching :: Proximity
343
344 -- Last level of reconstruction
345 , _q_nthLevel :: Level
346 -- Clustering method used from level 1 to nthLevel
347 , _q_nthCluster :: Cluster
348 } deriving (Generic, Show, Eq)
349
350 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
351 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show)
352 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
353
354 -------------------
355 -- | PhyloView | --
356 -------------------
357
358
359 -- | A PhyloView is the output type of a Phylo
360 data PhyloView = PhyloView
361 { _pv_param :: PhyloParam
362 , _pv_title :: Text
363 , _pv_description :: Text
364 , _pv_filiation :: Filiation
365 , _pv_level :: Level
366 , _pv_metrics :: Map Text [Double]
367 , _pv_branches :: [PhyloBranch]
368 , _pv_nodes :: [PhyloNode]
369 , _pv_edges :: [PhyloEdge]
370 } deriving (Generic, Show)
371
372 -- | A phyloview is made of PhyloBranches, edges and nodes
373 data PhyloBranch = PhyloBranch
374 { _pb_id :: PhyloBranchId
375 , _pb_label :: Text
376 , _pb_metrics :: Map Text [Double]
377 } deriving (Generic, Show)
378
379 data PhyloEdge = PhyloEdge
380 { _pe_source :: PhyloGroupId
381 , _pe_target :: PhyloGroupId
382 , _pe_type :: EdgeType
383 , _pe_weight :: Weight
384 } deriving (Generic, Show)
385
386 data PhyloNode = PhyloNode
387 { _pn_id :: PhyloGroupId
388 , _pn_bid :: Maybe PhyloBranchId
389 , _pn_label :: Text
390 , _pn_idx :: [Int]
391 , _pn_ngrams :: Maybe [Ngrams]
392 , _pn_metrics :: Map Text [Double]
393 , _pn_parents :: Maybe [PhyloGroupId]
394 , _pn_childs :: [PhyloNode]
395 } deriving (Generic, Show)
396
397 ------------------------
398 -- | PhyloQueryView | --
399 ------------------------
400
401
402 data ExportMode = Json | Dot | Svg
403 data DisplayMode = Flat | Nested
404
405 -- | A PhyloQueryView describes a Phylo as an output view
406 data PhyloQueryView = PhyloQueryView
407 { _qv_lvl :: Level
408
409 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
410 , _qv_filiation :: Filiation
411
412 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
413 , _qv_levelChilds :: Bool
414 , _qv_levelChildsDepth :: Level
415
416 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
417 -- Firstly the metrics, then the filters and the taggers
418 , _qv_metrics :: [Metric]
419 , _qv_filters :: [Filter]
420 , _qv_taggers :: [Tagger]
421
422 -- An asc or desc sort to apply to the PhyloGraph
423 , _qv_sort :: Maybe (Sort,Order)
424
425 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
426 , _qv_export :: ExportMode
427 , _qv_display :: DisplayMode
428 , _qv_verbose :: Bool
429 }
430
431
432 ----------------
433 -- | Lenses | --
434 ----------------
435
436
437 makeLenses ''PhyloParam
438 makeLenses ''Software
439 --
440 makeLenses ''Phylo
441 makeLenses ''PhyloPeaks
442 makeLenses ''PhyloGroup
443 makeLenses ''PhyloLevel
444 makeLenses ''PhyloPeriod
445 makeLenses ''PhyloFis
446 --
447 makeLenses ''Proximity
448 makeLenses ''Cluster
449 makeLenses ''Filter
450 --
451 makeLenses ''PhyloQueryBuild
452 makeLenses ''PhyloQueryView
453 --
454 makeLenses ''PhyloView
455 makeLenses ''PhyloBranch
456 makeLenses ''PhyloNode
457 makeLenses ''PhyloEdge
458
459
460 ------------------------
461 -- | JSON instances | --
462 ------------------------
463
464
465 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
466 $(deriveJSON (unPrefix "_phylo_peaks" ) ''PhyloPeaks )
467 $(deriveJSON defaultOptions ''Tree )
468 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
469 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
470 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
471 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
472 --
473 $(deriveJSON (unPrefix "_software_" ) ''Software )
474 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
475 --
476 $(deriveJSON defaultOptions ''Filter )
477 $(deriveJSON defaultOptions ''Metric )
478 $(deriveJSON defaultOptions ''Cluster )
479 $(deriveJSON defaultOptions ''Proximity )
480 --
481 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
482 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
483 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
484 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
485 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
486 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
487 --
488 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
489 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
490 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
491 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
492 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
493
494 $(deriveJSON defaultOptions ''Filiation )
495 $(deriveJSON defaultOptions ''EdgeType )
496
497
498 ----------------------------
499 -- | TODO XML instances | --
500 ----------------------------
501