]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
add rebranching to link distante branches
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
46
47 --------------------
48 -- | PhyloParam | --
49 --------------------
50
51
52 -- | Global parameters of a Phylo
53 data PhyloParam =
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
58
59
60 -- | Software parameters
61 data Software =
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
65
66
67 ---------------
68 -- | Phylo | --
69 ---------------
70
71
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
76 data Phylo =
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_docsByYears :: Map Date Double
81 , _phylo_cooc :: Map Date (Map (Int,Int) Double)
82 , _phylo_fis :: Map (Date,Date) [PhyloFis]
83 , _phylo_param :: PhyloParam
84 }
85 deriving (Generic, Show, Eq)
86
87
88 -- | The foundations of a phylomemy created from a given TermList
89 data PhyloFoundations =
90 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
91 , _phylo_foundationsTermsList :: TermList
92 } deriving (Generic, Show, Eq)
93
94
95 -- | Date : a simple Integer
96 type Date = Int
97
98 -- | UTCTime in seconds since UNIX epoch
99 -- type Start = POSIXTime
100 -- type End = POSIXTime
101 type Start = Date
102 type End = Date
103
104
105 ---------------------
106 -- | PhyloPeriod | --
107 ---------------------
108
109
110 -- | PhyloStep : steps of phylomemy on temporal axis
111 -- Period: tuple (start date, end date) of the step of the phylomemy
112 -- Levels: levels of granularity
113 data PhyloPeriod =
114 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
115 , _phylo_periodLevels :: [PhyloLevel]
116 }
117 deriving (Generic, Show, Eq)
118
119
120 --------------------
121 -- | PhyloLevel | --
122 --------------------
123
124
125 -- | PhyloLevel : levels of phylomemy on level axis
126 -- Levels description:
127 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
128 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
129 -- Level 1: First level of clustering
130 -- Level N: Nth level of clustering
131 data PhyloLevel =
132 PhyloLevel { _phylo_levelId :: PhyloLevelId
133 , _phylo_levelGroups :: [PhyloGroup]
134 }
135 deriving (Generic, Show, Eq)
136
137
138 --------------------
139 -- | PhyloGroup | --
140 --------------------
141
142
143 -- | PhyloGroup : group of ngrams at each level and step
144 -- Label : maybe has a label as text
145 -- Ngrams: set of terms that build the group
146 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
147 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
148 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
149 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
150 data PhyloGroup =
151 PhyloGroup { _phylo_groupId :: PhyloGroupId
152 , _phylo_groupLabel :: Text
153 , _phylo_groupNgrams :: [Int]
154 , _phylo_groupMeta :: Map Text Double
155 , _phylo_groupBranchId :: Maybe PhyloBranchId
156 , _phylo_groupCooc :: Map (Int,Int) Double
157
158 , _phylo_groupPeriodParents :: [Pointer]
159 , _phylo_groupPeriodChilds :: [Pointer]
160
161 , _phylo_groupLevelParents :: [Pointer]
162 , _phylo_groupLevelChilds :: [Pointer]
163 }
164 deriving (Generic, Show, Eq, Ord)
165
166
167 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
168 type Level = Int
169 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
170 type Index = Int
171
172
173 type PhyloPeriodId = (Start, End)
174 type PhyloLevelId = (PhyloPeriodId, Level)
175 type PhyloGroupId = (PhyloLevelId, Index)
176 type PhyloBranchId = (Level, Index)
177
178
179 -- | Weight : A generic mesure that can be associated with an Id
180 type Weight = Double
181 -- | Pointer : A weighted linked with a given PhyloGroup
182 type Pointer = (PhyloGroupId, Weight)
183 -- | Ngrams : a contiguous sequence of n terms
184 type Ngrams = Text
185
186
187 --------------------
188 -- | Aggregates | --
189 --------------------
190
191
192 -- | Document : a piece of Text linked to a Date
193 data Document = Document
194 { date :: Date
195 , text :: [Ngrams]
196 } deriving (Show,Generic)
197
198 -- | Clique : Set of ngrams cooccurring in the same Document
199 type Clique = Set Ngrams
200 -- | Support : Number of Documents where a Clique occurs
201 type Support = Int
202 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
203 data PhyloFis = PhyloFis
204 { _phyloFis_clique :: Clique
205 , _phyloFis_support :: Support
206 , _phyloFis_period :: (Date,Date)
207 } deriving (Generic,Show,Eq)
208
209 -- | A list of clustered PhyloGroup
210 type PhyloCluster = [PhyloGroup]
211
212
213 -- | A PhyloGroup in a Graph
214 type GroupNode = PhyloGroup
215 -- | A weighted links between two PhyloGroups in a Graph
216 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
217 -- | The association as a Graph between a list of Nodes and a list of Edges
218 type GroupGraph = ([GroupNode],[GroupEdge])
219
220
221 ---------------
222 -- | Error | --
223 ---------------
224
225
226 data PhyloError = LevelDoesNotExist
227 | LevelUnassigned
228 deriving (Show)
229
230
231 -----------------
232 -- | Cluster | --
233 -----------------
234
235
236 -- | Cluster constructors
237 data Cluster = Fis FisParams
238 | RelatedComponents RCParams
239 | Louvain LouvainParams
240 deriving (Generic, Show, Eq, Read)
241
242 -- | Parameters for Fis clustering
243 data FisParams = FisParams
244 { _fis_keepMinorFis :: Bool
245 , _fis_minSupport :: Support
246 , _fis_minSize :: Int
247 } deriving (Generic, Show, Eq, Read)
248
249 -- | Parameters for RelatedComponents clustering
250 data RCParams = RCParams
251 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
252
253 -- | Parameters for Louvain clustering
254 data LouvainParams = LouvainParams
255 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
256
257
258 -------------------
259 -- | Proximity | --
260 -------------------
261
262
263 -- | Proximity constructors
264 data Proximity = WeightedLogJaccard WLJParams
265 | Hamming HammingParams
266 | Filiation
267 deriving (Generic, Show, Eq, Read)
268
269 -- | Parameters for WeightedLogJaccard proximity
270 data WLJParams = WLJParams
271 { _wlj_threshold :: Double
272 , _wlj_sensibility :: Double
273 } deriving (Generic, Show, Eq, Read)
274
275 -- | Parameters for Hamming proximity
276 data HammingParams = HammingParams
277 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
278
279
280 ----------------
281 -- | Filter | --
282 ----------------
283
284
285 -- | Filter constructors
286 data Filter = LonelyBranch LBParams
287 | SizeBranch SBParams
288 deriving (Generic, Show, Eq)
289
290 -- | Parameters for LonelyBranch filter
291 data LBParams = LBParams
292 { _lb_periodsInf :: Int
293 , _lb_periodsSup :: Int
294 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
295
296 -- | Parameters for SizeBranch filter
297 data SBParams = SBParams
298 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
299
300
301 ----------------
302 -- | Metric | --
303 ----------------
304
305
306 -- | Metric constructors
307 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
308
309
310 ----------------
311 -- | Tagger | --
312 ----------------
313
314
315 -- | Tagger constructors
316 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
317
318
319 --------------
320 -- | Sort | --
321 --------------
322
323
324 -- | Sort constructors
325 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
326 data Order = Asc | Desc deriving (Generic, Show, Read)
327
328
329 --------------------
330 -- | PhyloQuery | --
331 --------------------
332
333
334 -- | A Phyloquery describes a phylomemic reconstruction
335 data PhyloQueryBuild = PhyloQueryBuild
336 { _q_phyloTitle :: Text
337 , _q_phyloDesc :: Text
338
339 -- Grain and Steps for the PhyloPeriods
340 , _q_periodGrain :: Int
341 , _q_periodSteps :: Int
342
343 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
344 , _q_contextualUnit :: Cluster
345 , _q_contextualUnitMetrics :: [Metric]
346 , _q_contextualUnitFilters :: [Filter]
347
348 -- Inter-temporal matching method of the Phylo
349 , _q_interTemporalMatching :: Proximity
350 , _q_interTemporalMatchingFrame :: Int
351
352 -- Last level of reconstruction
353 , _q_nthLevel :: Level
354 -- Clustering method used from level 1 to nthLevel
355 , _q_nthCluster :: Cluster
356 } deriving (Generic, Show, Eq)
357
358 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
359 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
360 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
361
362 -------------------
363 -- | PhyloView | --
364 -------------------
365
366
367 -- | A PhyloView is the output type of a Phylo
368 data PhyloView = PhyloView
369 { _pv_param :: PhyloParam
370 , _pv_title :: Text
371 , _pv_description :: Text
372 , _pv_filiation :: Filiation
373 , _pv_level :: Level
374 , _pv_periods :: [PhyloPeriodId]
375 , _pv_metrics :: Map Text [Double]
376 , _pv_branches :: [PhyloBranch]
377 , _pv_nodes :: [PhyloNode]
378 , _pv_edges :: [PhyloEdge]
379 } deriving (Generic, Show)
380
381 -- | A phyloview is made of PhyloBranches, edges and nodes
382 data PhyloBranch = PhyloBranch
383 { _pb_id :: PhyloBranchId
384 , _pb_peak :: Text
385 , _pb_metrics :: Map Text [Double]
386 } deriving (Generic, Show)
387
388 data PhyloEdge = PhyloEdge
389 { _pe_source :: PhyloGroupId
390 , _pe_target :: PhyloGroupId
391 , _pe_type :: EdgeType
392 , _pe_weight :: Weight
393 } deriving (Generic, Show)
394
395 data PhyloNode = PhyloNode
396 { _pn_id :: PhyloGroupId
397 , _pn_bid :: Maybe PhyloBranchId
398 , _pn_label :: Text
399 , _pn_idx :: [Int]
400 , _pn_ngrams :: Maybe [Ngrams]
401 , _pn_metrics :: Map Text [Double]
402 , _pn_parents :: Maybe [PhyloGroupId]
403 , _pn_childs :: [PhyloNode]
404 } deriving (Generic, Show)
405
406 ------------------------
407 -- | PhyloQueryView | --
408 ------------------------
409
410
411 data ExportMode = Json | Dot | Svg
412 deriving (Generic, Show, Read)
413 data DisplayMode = Flat | Nested
414 deriving (Generic, Show, Read)
415
416 -- | A PhyloQueryView describes a Phylo as an output view
417 data PhyloQueryView = PhyloQueryView
418 { _qv_lvl :: Level
419
420 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
421 , _qv_filiation :: Filiation
422
423 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
424 , _qv_levelChilds :: Bool
425 , _qv_levelChildsDepth :: Level
426
427 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
428 -- Firstly the metrics, then the filters and the taggers
429 , _qv_metrics :: [Metric]
430 , _qv_filters :: [Filter]
431 , _qv_taggers :: [Tagger]
432
433 -- An asc or desc sort to apply to the PhyloGraph
434 , _qv_sort :: Maybe (Sort,Order)
435
436 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
437 , _qv_export :: ExportMode
438 , _qv_display :: DisplayMode
439 , _qv_verbose :: Bool
440 }
441
442
443 ----------------
444 -- | Lenses | --
445 ----------------
446
447
448 makeLenses ''PhyloParam
449 makeLenses ''Software
450 --
451 makeLenses ''Phylo
452 makeLenses ''PhyloFoundations
453 makeLenses ''PhyloGroup
454 makeLenses ''PhyloLevel
455 makeLenses ''PhyloPeriod
456 makeLenses ''PhyloFis
457 --
458 makeLenses ''Proximity
459 makeLenses ''Cluster
460 makeLenses ''Filter
461 --
462 makeLenses ''PhyloQueryBuild
463 makeLenses ''PhyloQueryView
464 --
465 makeLenses ''PhyloView
466 makeLenses ''PhyloBranch
467 makeLenses ''PhyloNode
468 makeLenses ''PhyloEdge
469
470
471 ------------------------
472 -- | JSON instances | --
473 ------------------------
474
475
476 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
477 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
478 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
479 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
480 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
481 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
482 --
483 $(deriveJSON (unPrefix "_software_" ) ''Software )
484 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
485 --
486 $(deriveJSON defaultOptions ''Filter )
487 $(deriveJSON defaultOptions ''Metric )
488 $(deriveJSON defaultOptions ''Cluster )
489 $(deriveJSON defaultOptions ''Proximity )
490 --
491 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
492 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
493 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
494 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
495 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
496 --
497 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
498 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
499 --
500 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
501 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
502 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
503 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
504 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
505
506 $(deriveJSON defaultOptions ''Filiation )
507 $(deriveJSON defaultOptions ''EdgeType )
508
509
510 ----------------------------
511 -- | TODO XML instances | --
512 ----------------------------
513