]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
phylo from wos in progress
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
46
47 --------------------
48 -- | PhyloParam | --
49 --------------------
50
51
52 -- | Global parameters of a Phylo
53 data PhyloParam =
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
58
59
60 -- | Software parameters
61 data Software =
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
65
66
67 ---------------
68 -- | Phylo | --
69 ---------------
70
71
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
76 data Phylo =
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_param :: PhyloParam
81 }
82 deriving (Generic, Show, Eq)
83
84
85 -- | The foundations of a phylomemy created from a given TermList
86 data PhyloFoundations =
87 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
88 , _phylo_foundationsTermsList :: TermList
89 } deriving (Generic, Show, Eq)
90
91
92 -- | Date : a simple Integer
93 type Date = Int
94
95 -- | UTCTime in seconds since UNIX epoch
96 -- type Start = POSIXTime
97 -- type End = POSIXTime
98 type Start = Date
99 type End = Date
100
101
102 ---------------------
103 -- | PhyloPeriod | --
104 ---------------------
105
106
107 -- | PhyloStep : steps of phylomemy on temporal axis
108 -- Period: tuple (start date, end date) of the step of the phylomemy
109 -- Levels: levels of granularity
110 data PhyloPeriod =
111 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
112 , _phylo_periodLevels :: [PhyloLevel]
113 }
114 deriving (Generic, Show, Eq)
115
116
117 --------------------
118 -- | PhyloLevel | --
119 --------------------
120
121
122 -- | PhyloLevel : levels of phylomemy on level axis
123 -- Levels description:
124 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
125 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
126 -- Level 1: First level of clustering
127 -- Level N: Nth level of clustering
128 data PhyloLevel =
129 PhyloLevel { _phylo_levelId :: PhyloLevelId
130 , _phylo_levelGroups :: [PhyloGroup]
131 }
132 deriving (Generic, Show, Eq)
133
134
135 --------------------
136 -- | PhyloGroup | --
137 --------------------
138
139
140 -- | PhyloGroup : group of ngrams at each level and step
141 -- Label : maybe has a label as text
142 -- Ngrams: set of terms that build the group
143 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
144 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
145 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
146 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
147 data PhyloGroup =
148 PhyloGroup { _phylo_groupId :: PhyloGroupId
149 , _phylo_groupLabel :: Text
150 , _phylo_groupNgrams :: [Int]
151 , _phylo_groupMeta :: Map Text Double
152 , _phylo_groupCooc :: Map (Int, Int) Double
153 , _phylo_groupBranchId :: Maybe PhyloBranchId
154
155 , _phylo_groupPeriodParents :: [Pointer]
156 , _phylo_groupPeriodChilds :: [Pointer]
157
158 , _phylo_groupLevelParents :: [Pointer]
159 , _phylo_groupLevelChilds :: [Pointer]
160 }
161 deriving (Generic, Show, Eq, Ord)
162
163
164 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
165 type Level = Int
166 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
167 type Index = Int
168
169
170 type PhyloPeriodId = (Start, End)
171 type PhyloLevelId = (PhyloPeriodId, Level)
172 type PhyloGroupId = (PhyloLevelId, Index)
173 type PhyloBranchId = (Level, Index)
174
175
176 -- | Weight : A generic mesure that can be associated with an Id
177 type Weight = Double
178 -- | Pointer : A weighted linked with a given PhyloGroup
179 type Pointer = (PhyloGroupId, Weight)
180 -- | Ngrams : a contiguous sequence of n terms
181 type Ngrams = Text
182
183
184 --------------------
185 -- | Aggregates | --
186 --------------------
187
188
189 -- | Document : a piece of Text linked to a Date
190 data Document = Document
191 { date :: Date
192 , text :: [Ngrams]
193 } deriving (Show,Generic)
194
195 -- | Clique : Set of ngrams cooccurring in the same Document
196 type Clique = Set Ngrams
197 -- | Support : Number of Documents where a Clique occurs
198 type Support = Int
199 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
200 data PhyloFis = PhyloFis
201 { _phyloFis_clique :: Clique
202 , _phyloFis_support :: Support
203 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
204 } deriving (Show)
205
206 -- | A list of clustered PhyloGroup
207 type PhyloCluster = [PhyloGroup]
208
209
210 -- | A List of PhyloGroup in a Graph
211 type GroupNodes = [PhyloGroup]
212 -- | A List of weighted links between some PhyloGroups in a Graph
213 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
214 -- | The association as a Graph between a list of Nodes and a list of Edges
215 type GroupGraph = (GroupNodes,GroupEdges)
216
217
218 ---------------
219 -- | Error | --
220 ---------------
221
222
223 data PhyloError = LevelDoesNotExist
224 | LevelUnassigned
225 deriving (Show)
226
227
228 -----------------
229 -- | Cluster | --
230 -----------------
231
232
233 -- | Cluster constructors
234 data Cluster = Fis FisParams
235 | RelatedComponents RCParams
236 | Louvain LouvainParams
237 deriving (Generic, Show, Eq, Read)
238
239 -- | Parameters for Fis clustering
240 data FisParams = FisParams
241 { _fis_keepMinorFis :: Bool
242 , _fis_minSupport :: Support
243 , _fis_minSize :: Int
244 } deriving (Generic, Show, Eq, Read)
245
246 -- | Parameters for RelatedComponents clustering
247 data RCParams = RCParams
248 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
249
250 -- | Parameters for Louvain clustering
251 data LouvainParams = LouvainParams
252 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
253
254
255 -------------------
256 -- | Proximity | --
257 -------------------
258
259
260 -- | Proximity constructors
261 data Proximity = WeightedLogJaccard WLJParams
262 | Hamming HammingParams
263 | Filiation
264 deriving (Generic, Show, Eq, Read)
265
266 -- | Parameters for WeightedLogJaccard proximity
267 data WLJParams = WLJParams
268 { _wlj_threshold :: Double
269 , _wlj_sensibility :: Double
270 } deriving (Generic, Show, Eq, Read)
271
272 -- | Parameters for Hamming proximity
273 data HammingParams = HammingParams
274 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
275
276
277 ----------------
278 -- | Filter | --
279 ----------------
280
281
282 -- | Filter constructors
283 data Filter = SmallBranch SBParams deriving (Generic, Show, Eq)
284
285 -- | Parameters for SmallBranch filter
286 data SBParams = SBParams
287 { _sb_periodsInf :: Int
288 , _sb_periodsSup :: Int
289 , _sb_minNodes :: Int } deriving (Generic, Show, Eq)
290
291
292 ----------------
293 -- | Metric | --
294 ----------------
295
296
297 -- | Metric constructors
298 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
299
300
301 ----------------
302 -- | Tagger | --
303 ----------------
304
305
306 -- | Tagger constructors
307 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
308
309
310 --------------
311 -- | Sort | --
312 --------------
313
314
315 -- | Sort constructors
316 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
317 data Order = Asc | Desc deriving (Generic, Show, Read)
318
319
320 --------------------
321 -- | PhyloQuery | --
322 --------------------
323
324
325 -- | A Phyloquery describes a phylomemic reconstruction
326 data PhyloQueryBuild = PhyloQueryBuild
327 { _q_phyloTitle :: Text
328 , _q_phyloDesc :: Text
329
330 -- Grain and Steps for the PhyloPeriods
331 , _q_periodGrain :: Int
332 , _q_periodSteps :: Int
333
334 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
335 , _q_contextualUnit :: Cluster
336 , _q_contextualUnitMetrics :: [Metric]
337 , _q_contextualUnitFilters :: [Filter]
338
339 -- Inter-temporal matching method of the Phylo
340 , _q_interTemporalMatching :: Proximity
341
342 -- Last level of reconstruction
343 , _q_nthLevel :: Level
344 -- Clustering method used from level 1 to nthLevel
345 , _q_nthCluster :: Cluster
346 } deriving (Generic, Show, Eq)
347
348 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
349 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
350 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
351
352 -------------------
353 -- | PhyloView | --
354 -------------------
355
356
357 -- | A PhyloView is the output type of a Phylo
358 data PhyloView = PhyloView
359 { _pv_param :: PhyloParam
360 , _pv_title :: Text
361 , _pv_description :: Text
362 , _pv_filiation :: Filiation
363 , _pv_level :: Level
364 , _pv_periods :: [PhyloPeriodId]
365 , _pv_metrics :: Map Text [Double]
366 , _pv_branches :: [PhyloBranch]
367 , _pv_nodes :: [PhyloNode]
368 , _pv_edges :: [PhyloEdge]
369 } deriving (Generic, Show)
370
371 -- | A phyloview is made of PhyloBranches, edges and nodes
372 data PhyloBranch = PhyloBranch
373 { _pb_id :: PhyloBranchId
374 , _pb_peak :: Text
375 , _pb_metrics :: Map Text [Double]
376 } deriving (Generic, Show)
377
378 data PhyloEdge = PhyloEdge
379 { _pe_source :: PhyloGroupId
380 , _pe_target :: PhyloGroupId
381 , _pe_type :: EdgeType
382 , _pe_weight :: Weight
383 } deriving (Generic, Show)
384
385 data PhyloNode = PhyloNode
386 { _pn_id :: PhyloGroupId
387 , _pn_bid :: Maybe PhyloBranchId
388 , _pn_label :: Text
389 , _pn_idx :: [Int]
390 , _pn_ngrams :: Maybe [Ngrams]
391 , _pn_metrics :: Map Text [Double]
392 , _pn_parents :: Maybe [PhyloGroupId]
393 , _pn_childs :: [PhyloNode]
394 } deriving (Generic, Show)
395
396 ------------------------
397 -- | PhyloQueryView | --
398 ------------------------
399
400
401 data ExportMode = Json | Dot | Svg
402 deriving (Generic, Show, Read)
403 data DisplayMode = Flat | Nested
404 deriving (Generic, Show, Read)
405
406 -- | A PhyloQueryView describes a Phylo as an output view
407 data PhyloQueryView = PhyloQueryView
408 { _qv_lvl :: Level
409
410 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
411 , _qv_filiation :: Filiation
412
413 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
414 , _qv_levelChilds :: Bool
415 , _qv_levelChildsDepth :: Level
416
417 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
418 -- Firstly the metrics, then the filters and the taggers
419 , _qv_metrics :: [Metric]
420 , _qv_filters :: [Filter]
421 , _qv_taggers :: [Tagger]
422
423 -- An asc or desc sort to apply to the PhyloGraph
424 , _qv_sort :: Maybe (Sort,Order)
425
426 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
427 , _qv_export :: ExportMode
428 , _qv_display :: DisplayMode
429 , _qv_verbose :: Bool
430 }
431
432
433 ----------------
434 -- | Lenses | --
435 ----------------
436
437
438 makeLenses ''PhyloParam
439 makeLenses ''Software
440 --
441 makeLenses ''Phylo
442 makeLenses ''PhyloFoundations
443 makeLenses ''PhyloGroup
444 makeLenses ''PhyloLevel
445 makeLenses ''PhyloPeriod
446 makeLenses ''PhyloFis
447 --
448 makeLenses ''Proximity
449 makeLenses ''Cluster
450 makeLenses ''Filter
451 --
452 makeLenses ''PhyloQueryBuild
453 makeLenses ''PhyloQueryView
454 --
455 makeLenses ''PhyloView
456 makeLenses ''PhyloBranch
457 makeLenses ''PhyloNode
458 makeLenses ''PhyloEdge
459
460
461 ------------------------
462 -- | JSON instances | --
463 ------------------------
464
465
466 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
467 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
468 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
469 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
470 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
471 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
472 --
473 $(deriveJSON (unPrefix "_software_" ) ''Software )
474 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
475 --
476 $(deriveJSON defaultOptions ''Filter )
477 $(deriveJSON defaultOptions ''Metric )
478 $(deriveJSON defaultOptions ''Cluster )
479 $(deriveJSON defaultOptions ''Proximity )
480 --
481 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
482 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
483 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
484 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
485 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
486 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
487 --
488 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
489 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
490 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
491 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
492 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
493
494 $(deriveJSON defaultOptions ''Filiation )
495 $(deriveJSON defaultOptions ''EdgeType )
496
497
498 ----------------------------
499 -- | TODO XML instances | --
500 ----------------------------
501