]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
issue with System.Directory
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
46
47 --------------------
48 -- | PhyloParam | --
49 --------------------
50
51
52 -- | Global parameters of a Phylo
53 data PhyloParam =
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
58
59
60 -- | Software parameters
61 data Software =
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
65
66
67 ---------------
68 -- | Phylo | --
69 ---------------
70
71
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
76 data Phylo =
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_docsByYears :: Map Date Double
81 , _phylo_cooc :: Map Date (Map (Int,Int) Double)
82 , _phylo_fis :: Map (Date,Date) [PhyloFis]
83 , _phylo_param :: PhyloParam
84 }
85 deriving (Generic, Show, Eq)
86
87
88 -- | The foundations of a phylomemy created from a given TermList
89 data PhyloFoundations =
90 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
91 , _phylo_foundationsTermsList :: TermList
92 } deriving (Generic, Show, Eq)
93
94
95 -- | Date : a simple Integer
96 type Date = Int
97
98 -- | UTCTime in seconds since UNIX epoch
99 -- type Start = POSIXTime
100 -- type End = POSIXTime
101 type Start = Date
102 type End = Date
103
104
105 ---------------------
106 -- | PhyloPeriod | --
107 ---------------------
108
109
110 -- | PhyloStep : steps of phylomemy on temporal axis
111 -- Period: tuple (start date, end date) of the step of the phylomemy
112 -- Levels: levels of granularity
113 data PhyloPeriod =
114 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
115 , _phylo_periodLevels :: [PhyloLevel]
116 }
117 deriving (Generic, Show, Eq)
118
119
120 --------------------
121 -- | PhyloLevel | --
122 --------------------
123
124
125 -- | PhyloLevel : levels of phylomemy on level axis
126 -- Levels description:
127 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
128 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
129 -- Level 1: First level of clustering
130 -- Level N: Nth level of clustering
131 data PhyloLevel =
132 PhyloLevel { _phylo_levelId :: PhyloLevelId
133 , _phylo_levelGroups :: [PhyloGroup]
134 }
135 deriving (Generic, Show, Eq)
136
137
138 --------------------
139 -- | PhyloGroup | --
140 --------------------
141
142
143 -- | PhyloGroup : group of ngrams at each level and step
144 -- Label : maybe has a label as text
145 -- Ngrams: set of terms that build the group
146 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
147 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
148 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
149 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
150 data PhyloGroup =
151 PhyloGroup { _phylo_groupId :: PhyloGroupId
152 , _phylo_groupLabel :: Text
153 , _phylo_groupNgrams :: [Int]
154 , _phylo_groupMeta :: Map Text Double
155 , _phylo_groupBranchId :: Maybe PhyloBranchId
156 , _phylo_groupCooc :: Map (Int,Int) Double
157
158 , _phylo_groupPeriodParents :: [Pointer]
159 , _phylo_groupPeriodChilds :: [Pointer]
160
161 , _phylo_groupLevelParents :: [Pointer]
162 , _phylo_groupLevelChilds :: [Pointer]
163 }
164 deriving (Generic, Show, Eq, Ord)
165
166
167 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
168 type Level = Int
169 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
170 type Index = Int
171
172
173 type PhyloPeriodId = (Start, End)
174 type PhyloLevelId = (PhyloPeriodId, Level)
175 type PhyloGroupId = (PhyloLevelId, Index)
176 type PhyloBranchId = (Level, Index)
177
178
179 -- | Weight : A generic mesure that can be associated with an Id
180 type Weight = Double
181 -- | Pointer : A weighted linked with a given PhyloGroup
182 type Pointer = (PhyloGroupId, Weight)
183 -- | Ngrams : a contiguous sequence of n terms
184 type Ngrams = Text
185
186
187 --------------------
188 -- | Aggregates | --
189 --------------------
190
191
192 -- | Document : a piece of Text linked to a Date
193 data Document = Document
194 { date :: Date
195 , text :: [Ngrams]
196 } deriving (Show,Generic)
197
198 -- | Clique : Set of ngrams cooccurring in the same Document
199 type Clique = Set Ngrams
200 -- | Support : Number of Documents where a Clique occurs
201 type Support = Int
202 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
203 data PhyloFis = PhyloFis
204 { _phyloFis_clique :: Clique
205 , _phyloFis_support :: Support
206 , _phyloFis_period :: (Date,Date)
207 } deriving (Generic,Show,Eq)
208
209 -- | A list of clustered PhyloGroup
210 type PhyloCluster = [PhyloGroup]
211
212
213 -- | A PhyloGroup in a Graph
214 type GroupNode = PhyloGroup
215 -- | A weighted links between two PhyloGroups in a Graph
216 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
217 -- | The association as a Graph between a list of Nodes and a list of Edges
218 type GroupGraph = ([GroupNode],[GroupEdge])
219
220
221 ---------------
222 -- | Error | --
223 ---------------
224
225
226 data PhyloError = LevelDoesNotExist
227 | LevelUnassigned
228 deriving (Show)
229
230
231 -----------------
232 -- | Cluster | --
233 -----------------
234
235
236 -- | Cluster constructors
237 data Cluster = Fis FisParams
238 | RelatedComponents RCParams
239 | Louvain LouvainParams
240 deriving (Generic, Show, Eq, Read)
241
242 -- | Parameters for Fis clustering
243 data FisParams = FisParams
244 { _fis_keepMinorFis :: Bool
245 , _fis_minSupport :: Support
246 , _fis_minSize :: Int
247 } deriving (Generic, Show, Eq, Read)
248
249 -- | Parameters for RelatedComponents clustering
250 data RCParams = RCParams
251 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
252
253 -- | Parameters for Louvain clustering
254 data LouvainParams = LouvainParams
255 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
256
257
258 -------------------
259 -- | Proximity | --
260 -------------------
261
262
263 -- | Proximity constructors
264 data Proximity = WeightedLogJaccard WLJParams
265 | Hamming HammingParams
266 | Filiation
267 deriving (Generic, Show, Eq, Read)
268
269 -- | Parameters for WeightedLogJaccard proximity
270 data WLJParams = WLJParams
271 { _wlj_threshold :: Double
272 , _wlj_sensibility :: Double
273 } deriving (Generic, Show, Eq, Read)
274
275 -- | Parameters for Hamming proximity
276 data HammingParams = HammingParams
277 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
278
279
280 ----------------
281 -- | Filter | --
282 ----------------
283
284
285 -- | Filter constructors
286 data Filter = LonelyBranch LBParams
287 | SizeBranch SBParams
288 deriving (Generic, Show, Eq)
289
290 -- | Parameters for LonelyBranch filter
291 data LBParams = LBParams
292 { _lb_periodsInf :: Int
293 , _lb_periodsSup :: Int
294 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
295
296 -- | Parameters for SizeBranch filter
297 data SBParams = SBParams
298 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
299
300
301 ----------------
302 -- | Metric | --
303 ----------------
304
305
306 -- | Metric constructors
307 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
308
309
310 ----------------
311 -- | Tagger | --
312 ----------------
313
314
315 -- | Tagger constructors
316 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
317
318
319 --------------
320 -- | Sort | --
321 --------------
322
323
324 -- | Sort constructors
325 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
326 data Order = Asc | Desc deriving (Generic, Show, Read)
327
328
329 --------------------
330 -- | PhyloQuery | --
331 --------------------
332
333
334 -- | A Phyloquery describes a phylomemic reconstruction
335 data PhyloQueryBuild = PhyloQueryBuild
336 { _q_phyloTitle :: Text
337 , _q_phyloDesc :: Text
338
339 -- Grain and Steps for the PhyloPeriods
340 , _q_periodGrain :: Int
341 , _q_periodSteps :: Int
342
343 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
344 , _q_contextualUnit :: Cluster
345 , _q_contextualUnitMetrics :: [Metric]
346 , _q_contextualUnitFilters :: [Filter]
347
348 -- Inter-temporal matching method of the Phylo
349 , _q_interTemporalMatching :: Proximity
350
351 -- Last level of reconstruction
352 , _q_nthLevel :: Level
353 -- Clustering method used from level 1 to nthLevel
354 , _q_nthCluster :: Cluster
355 } deriving (Generic, Show, Eq)
356
357 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
358 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
359 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
360
361 -------------------
362 -- | PhyloView | --
363 -------------------
364
365
366 -- | A PhyloView is the output type of a Phylo
367 data PhyloView = PhyloView
368 { _pv_param :: PhyloParam
369 , _pv_title :: Text
370 , _pv_description :: Text
371 , _pv_filiation :: Filiation
372 , _pv_level :: Level
373 , _pv_periods :: [PhyloPeriodId]
374 , _pv_metrics :: Map Text [Double]
375 , _pv_branches :: [PhyloBranch]
376 , _pv_nodes :: [PhyloNode]
377 , _pv_edges :: [PhyloEdge]
378 } deriving (Generic, Show)
379
380 -- | A phyloview is made of PhyloBranches, edges and nodes
381 data PhyloBranch = PhyloBranch
382 { _pb_id :: PhyloBranchId
383 , _pb_peak :: Text
384 , _pb_metrics :: Map Text [Double]
385 } deriving (Generic, Show)
386
387 data PhyloEdge = PhyloEdge
388 { _pe_source :: PhyloGroupId
389 , _pe_target :: PhyloGroupId
390 , _pe_type :: EdgeType
391 , _pe_weight :: Weight
392 } deriving (Generic, Show)
393
394 data PhyloNode = PhyloNode
395 { _pn_id :: PhyloGroupId
396 , _pn_bid :: Maybe PhyloBranchId
397 , _pn_label :: Text
398 , _pn_idx :: [Int]
399 , _pn_ngrams :: Maybe [Ngrams]
400 , _pn_metrics :: Map Text [Double]
401 , _pn_parents :: Maybe [PhyloGroupId]
402 , _pn_childs :: [PhyloNode]
403 } deriving (Generic, Show)
404
405 ------------------------
406 -- | PhyloQueryView | --
407 ------------------------
408
409
410 data ExportMode = Json | Dot | Svg
411 deriving (Generic, Show, Read)
412 data DisplayMode = Flat | Nested
413 deriving (Generic, Show, Read)
414
415 -- | A PhyloQueryView describes a Phylo as an output view
416 data PhyloQueryView = PhyloQueryView
417 { _qv_lvl :: Level
418
419 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
420 , _qv_filiation :: Filiation
421
422 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
423 , _qv_levelChilds :: Bool
424 , _qv_levelChildsDepth :: Level
425
426 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
427 -- Firstly the metrics, then the filters and the taggers
428 , _qv_metrics :: [Metric]
429 , _qv_filters :: [Filter]
430 , _qv_taggers :: [Tagger]
431
432 -- An asc or desc sort to apply to the PhyloGraph
433 , _qv_sort :: Maybe (Sort,Order)
434
435 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
436 , _qv_export :: ExportMode
437 , _qv_display :: DisplayMode
438 , _qv_verbose :: Bool
439 }
440
441
442 ----------------
443 -- | Lenses | --
444 ----------------
445
446
447 makeLenses ''PhyloParam
448 makeLenses ''Software
449 --
450 makeLenses ''Phylo
451 makeLenses ''PhyloFoundations
452 makeLenses ''PhyloGroup
453 makeLenses ''PhyloLevel
454 makeLenses ''PhyloPeriod
455 makeLenses ''PhyloFis
456 --
457 makeLenses ''Proximity
458 makeLenses ''Cluster
459 makeLenses ''Filter
460 --
461 makeLenses ''PhyloQueryBuild
462 makeLenses ''PhyloQueryView
463 --
464 makeLenses ''PhyloView
465 makeLenses ''PhyloBranch
466 makeLenses ''PhyloNode
467 makeLenses ''PhyloEdge
468
469
470 ------------------------
471 -- | JSON instances | --
472 ------------------------
473
474
475 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
476 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
477 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
478 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
479 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
480 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
481 --
482 $(deriveJSON (unPrefix "_software_" ) ''Software )
483 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
484 --
485 $(deriveJSON defaultOptions ''Filter )
486 $(deriveJSON defaultOptions ''Metric )
487 $(deriveJSON defaultOptions ''Cluster )
488 $(deriveJSON defaultOptions ''Proximity )
489 --
490 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
491 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
492 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
493 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
494 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
495 --
496 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
497 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
498 --
499 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
500 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
501 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
502 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
503 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
504
505 $(deriveJSON defaultOptions ''Filiation )
506 $(deriveJSON defaultOptions ''EdgeType )
507
508
509 ----------------------------
510 -- | TODO XML instances | --
511 ----------------------------
512