]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
[BIN/PHYLO] first draft, needs phylo connection.
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Prelude
45
46 --------------------
47 -- | PhyloParam | --
48 --------------------
49
50
51 -- | Global parameters of a Phylo
52 data PhyloParam =
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_query :: PhyloQueryBuild
56 } deriving (Generic, Show, Eq)
57
58
59 -- | Software parameters
60 data Software =
61 Software { _software_name :: Text
62 , _software_version :: Text
63 } deriving (Generic, Show, Eq)
64
65
66 ---------------
67 -- | Phylo | --
68 ---------------
69
70
71 -- | Phylo datatype of a phylomemy
72 -- Duration : time Segment of the whole Phylo
73 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
74 -- Periods : list of all the periods of a Phylo
75 data Phylo =
76 Phylo { _phylo_duration :: (Start, End)
77 , _phylo_foundations :: Vector Ngrams
78 , _phylo_foundationsPeaks :: PhyloPeaks
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_param :: PhyloParam
81 }
82 deriving (Generic, Show, Eq)
83
84 -- | The PhyloPeaks describe the aggregation of some foundations Ngrams behind a list of Ngrams trees (ie: a forest)
85 -- PeaksLabels are the root labels of each Ngrams trees
86 data PhyloPeaks =
87 PhyloPeaks { _phylo_peaksLabels :: Vector Ngrams
88 , _phylo_peaksForest :: [Tree Ngrams]
89 }
90 deriving (Generic, Show, Eq)
91
92 -- | A Tree of Ngrams where each node is a label
93 data Tree a = Empty | Node a [Tree a] deriving (Generic, Show, Eq)
94
95
96 -- | Date : a simple Integer
97 type Date = Int
98
99 -- | UTCTime in seconds since UNIX epoch
100 -- type Start = POSIXTime
101 -- type End = POSIXTime
102 type Start = Date
103 type End = Date
104
105
106 ---------------------
107 -- | PhyloPeriod | --
108 ---------------------
109
110
111 -- | PhyloStep : steps of phylomemy on temporal axis
112 -- Period: tuple (start date, end date) of the step of the phylomemy
113 -- Levels: levels of granularity
114 data PhyloPeriod =
115 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
116 , _phylo_periodLevels :: [PhyloLevel]
117 }
118 deriving (Generic, Show, Eq)
119
120
121 --------------------
122 -- | PhyloLevel | --
123 --------------------
124
125
126 -- | PhyloLevel : levels of phylomemy on level axis
127 -- Levels description:
128 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
129 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
130 -- Level 1: First level of clustering
131 -- Level N: Nth level of clustering
132 data PhyloLevel =
133 PhyloLevel { _phylo_levelId :: PhyloLevelId
134 , _phylo_levelGroups :: [PhyloGroup]
135 }
136 deriving (Generic, Show, Eq)
137
138
139 --------------------
140 -- | PhyloGroup | --
141 --------------------
142
143
144 -- | PhyloGroup : group of ngrams at each level and step
145 -- Label : maybe has a label as text
146 -- Ngrams: set of terms that build the group
147 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
148 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
149 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
150 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
151 data PhyloGroup =
152 PhyloGroup { _phylo_groupId :: PhyloGroupId
153 , _phylo_groupLabel :: Text
154 , _phylo_groupNgrams :: [Int]
155 , _phylo_groupMeta :: Map Text Double
156 , _phylo_groupCooc :: Map (Int, Int) Double
157 , _phylo_groupBranchId :: Maybe PhyloBranchId
158
159 , _phylo_groupPeriodParents :: [Pointer]
160 , _phylo_groupPeriodChilds :: [Pointer]
161
162 , _phylo_groupLevelParents :: [Pointer]
163 , _phylo_groupLevelChilds :: [Pointer]
164 }
165 deriving (Generic, Show, Eq, Ord)
166
167
168 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
169 type Level = Int
170 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
171 type Index = Int
172
173
174 type PhyloPeriodId = (Start, End)
175 type PhyloLevelId = (PhyloPeriodId, Level)
176 type PhyloGroupId = (PhyloLevelId, Index)
177 type PhyloBranchId = (Level, Index)
178
179
180 -- | Weight : A generic mesure that can be associated with an Id
181 type Weight = Double
182 -- | Pointer : A weighted linked with a given PhyloGroup
183 type Pointer = (PhyloGroupId, Weight)
184 -- | Ngrams : a contiguous sequence of n terms
185 type Ngrams = Text
186
187
188 --------------------
189 -- | Aggregates | --
190 --------------------
191
192
193 -- | Document : a piece of Text linked to a Date
194 data Document = Document
195 { date :: Date
196 , text :: [Ngrams]
197 } deriving (Show)
198
199 -- | Clique : Set of ngrams cooccurring in the same Document
200 type Clique = Set Ngrams
201 -- | Support : Number of Documents where a Clique occurs
202 type Support = Int
203 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
204 data PhyloFis = PhyloFis
205 { _phyloFis_clique :: Clique
206 , _phyloFis_support :: Support
207 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
208 } deriving (Show)
209
210 -- | A list of clustered PhyloGroup
211 type PhyloCluster = [PhyloGroup]
212
213
214 -- | A List of PhyloGroup in a Graph
215 type GroupNodes = [PhyloGroup]
216 -- | A List of weighted links between some PhyloGroups in a Graph
217 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
218 -- | The association as a Graph between a list of Nodes and a list of Edges
219 type GroupGraph = (GroupNodes,GroupEdges)
220
221
222 ---------------
223 -- | Error | --
224 ---------------
225
226
227 data PhyloError = LevelDoesNotExist
228 | LevelUnassigned
229 deriving (Show)
230
231
232 -----------------
233 -- | Cluster | --
234 -----------------
235
236
237 -- | Cluster constructors
238 data Cluster = Fis FisParams
239 | RelatedComponents RCParams
240 | Louvain LouvainParams
241 deriving (Generic, Show, Eq, Read)
242
243 -- | Parameters for Fis clustering
244 data FisParams = FisParams
245 { _fis_keepMinorFis :: Bool
246 , _fis_minSupport :: Support
247 } deriving (Generic, Show, Eq, Read)
248
249 -- | Parameters for RelatedComponents clustering
250 data RCParams = RCParams
251 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
252
253 -- | Parameters for Louvain clustering
254 data LouvainParams = LouvainParams
255 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
256
257
258 -------------------
259 -- | Proximity | --
260 -------------------
261
262
263 -- | Proximity constructors
264 data Proximity = WeightedLogJaccard WLJParams
265 | Hamming HammingParams
266 | Filiation
267 deriving (Generic, Show, Eq, Read)
268
269 -- | Parameters for WeightedLogJaccard proximity
270 data WLJParams = WLJParams
271 { _wlj_threshold :: Double
272 , _wlj_sensibility :: Double
273 } deriving (Generic, Show, Eq, Read)
274
275 -- | Parameters for Hamming proximity
276 data HammingParams = HammingParams
277 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
278
279
280 ----------------
281 -- | Filter | --
282 ----------------
283
284
285 -- | Filter constructors
286 data Filter = SmallBranch SBParams deriving (Generic, Show, Eq)
287
288 -- | Parameters for SmallBranch filter
289 data SBParams = SBParams
290 { _sb_periodsInf :: Int
291 , _sb_periodsSup :: Int
292 , _sb_minNodes :: Int } deriving (Generic, Show, Eq)
293
294
295 ----------------
296 -- | Metric | --
297 ----------------
298
299
300 -- | Metric constructors
301 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
302
303
304 ----------------
305 -- | Tagger | --
306 ----------------
307
308
309 -- | Tagger constructors
310 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics
311 deriving (Generic, Show, Read)
312
313
314 --------------
315 -- | Sort | --
316 --------------
317
318
319 -- | Sort constructors
320 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
321 data Order = Asc | Desc deriving (Generic, Show, Read)
322
323
324 --------------------
325 -- | PhyloQuery | --
326 --------------------
327
328
329 -- | A Phyloquery describes a phylomemic reconstruction
330 data PhyloQueryBuild = PhyloQueryBuild
331 { _q_phyloTitle :: Text
332 , _q_phyloDesc :: Text
333
334 -- Grain and Steps for the PhyloPeriods
335 , _q_periodGrain :: Int
336 , _q_periodSteps :: Int
337
338 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
339 , _q_contextualUnit :: Cluster
340 , _q_contextualUnitMetrics :: [Metric]
341 , _q_contextualUnitFilters :: [Filter]
342
343 -- Inter-temporal matching method of the Phylo
344 , _q_interTemporalMatching :: Proximity
345
346 -- Last level of reconstruction
347 , _q_nthLevel :: Level
348 -- Clustering method used from level 1 to nthLevel
349 , _q_nthCluster :: Cluster
350 } deriving (Generic, Show, Eq)
351
352 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
353 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show)
354 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
355
356 -------------------
357 -- | PhyloView | --
358 -------------------
359
360
361 -- | A PhyloView is the output type of a Phylo
362 data PhyloView = PhyloView
363 { _pv_param :: PhyloParam
364 , _pv_title :: Text
365 , _pv_description :: Text
366 , _pv_filiation :: Filiation
367 , _pv_level :: Level
368 , _pv_metrics :: Map Text [Double]
369 , _pv_branches :: [PhyloBranch]
370 , _pv_nodes :: [PhyloNode]
371 , _pv_edges :: [PhyloEdge]
372 } deriving (Generic, Show)
373
374 -- | A phyloview is made of PhyloBranches, edges and nodes
375 data PhyloBranch = PhyloBranch
376 { _pb_id :: PhyloBranchId
377 , _pb_label :: Text
378 , _pb_metrics :: Map Text [Double]
379 } deriving (Generic, Show)
380
381 data PhyloEdge = PhyloEdge
382 { _pe_source :: PhyloGroupId
383 , _pe_target :: PhyloGroupId
384 , _pe_type :: EdgeType
385 , _pe_weight :: Weight
386 } deriving (Generic, Show)
387
388 data PhyloNode = PhyloNode
389 { _pn_id :: PhyloGroupId
390 , _pn_bid :: Maybe PhyloBranchId
391 , _pn_label :: Text
392 , _pn_idx :: [Int]
393 , _pn_ngrams :: Maybe [Ngrams]
394 , _pn_metrics :: Map Text [Double]
395 , _pn_parents :: Maybe [PhyloGroupId]
396 , _pn_childs :: [PhyloNode]
397 } deriving (Generic, Show)
398
399 ------------------------
400 -- | PhyloQueryView | --
401 ------------------------
402
403
404 data ExportMode = Json | Dot | Svg
405 data DisplayMode = Flat | Nested
406 deriving (Generic, Show, Read)
407
408 -- | A PhyloQueryView describes a Phylo as an output view
409 data PhyloQueryView = PhyloQueryView
410 { _qv_lvl :: Level
411
412 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
413 , _qv_filiation :: Filiation
414
415 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
416 , _qv_levelChilds :: Bool
417 , _qv_levelChildsDepth :: Level
418
419 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
420 -- Firstly the metrics, then the filters and the taggers
421 , _qv_metrics :: [Metric]
422 , _qv_filters :: [Filter]
423 , _qv_taggers :: [Tagger]
424
425 -- An asc or desc sort to apply to the PhyloGraph
426 , _qv_sort :: Maybe (Sort,Order)
427
428 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
429 , _qv_export :: ExportMode
430 , _qv_display :: DisplayMode
431 , _qv_verbose :: Bool
432 }
433
434
435 ----------------
436 -- | Lenses | --
437 ----------------
438
439
440 makeLenses ''PhyloParam
441 makeLenses ''Software
442 --
443 makeLenses ''Phylo
444 makeLenses ''PhyloPeaks
445 makeLenses ''PhyloGroup
446 makeLenses ''PhyloLevel
447 makeLenses ''PhyloPeriod
448 makeLenses ''PhyloFis
449 --
450 makeLenses ''Proximity
451 makeLenses ''Cluster
452 makeLenses ''Filter
453 --
454 makeLenses ''PhyloQueryBuild
455 makeLenses ''PhyloQueryView
456 --
457 makeLenses ''PhyloView
458 makeLenses ''PhyloBranch
459 makeLenses ''PhyloNode
460 makeLenses ''PhyloEdge
461
462
463 ------------------------
464 -- | JSON instances | --
465 ------------------------
466
467
468 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
469 $(deriveJSON (unPrefix "_phylo_peaks" ) ''PhyloPeaks )
470 $(deriveJSON defaultOptions ''Tree )
471 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
472 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
473 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
474 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
475 --
476 $(deriveJSON (unPrefix "_software_" ) ''Software )
477 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
478 --
479 $(deriveJSON defaultOptions ''Filter )
480 $(deriveJSON defaultOptions ''Metric )
481 $(deriveJSON defaultOptions ''Cluster )
482 $(deriveJSON defaultOptions ''Proximity )
483 --
484 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
485 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
486 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
487 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
488 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
489 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
490 --
491 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
492 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
493 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
494 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
495 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
496
497 $(deriveJSON defaultOptions ''Filiation )
498 $(deriveJSON defaultOptions ''EdgeType )
499
500
501 ----------------------------
502 -- | TODO XML instances | --
503 ----------------------------
504