]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
working on phyloPeaks
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45
46 --------------------
47 -- | PhyloParam | --
48 --------------------
49
50
51 -- | Global parameters of a Phylo
52 data PhyloParam =
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_query :: PhyloQuery
56 } deriving (Generic, Show)
57
58
59 -- | Software parameters
60 data Software =
61 Software { _software_name :: Text
62 , _software_version :: Text
63 } deriving (Generic, Show)
64
65
66 ---------------
67 -- | Phylo | --
68 ---------------
69
70
71 -- | Phylo datatype of a phylomemy
72 -- Duration : time Segment of the whole Phylo
73 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
74 -- Periods : list of all the periods of a Phylo
75 data Phylo =
76 Phylo { _phylo_duration :: (Start, End)
77 , _phylo_foundations :: Vector Ngrams
78 -- , _phylo_peaks :: PhyloPeaks
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_param :: PhyloParam
81 }
82 deriving (Generic, Show)
83
84 -- data PhyloPeaks =
85 -- PhyloPeaks { _phylo_peaksLabel :: Vector Ngrams
86 -- , _phylo_peaksTrees :: [(Ngrams, TreeNgrams)]
87 -- }
88 -- deriving (Generic, Show)
89
90
91 -- | Date : a simple Integer
92 type Date = Int
93
94 -- | UTCTime in seconds since UNIX epoch
95 -- type Start = POSIXTime
96 -- type End = POSIXTime
97 type Start = Date
98 type End = Date
99
100
101 ---------------------
102 -- | PhyloPeriod | --
103 ---------------------
104
105
106 -- | PhyloStep : steps of phylomemy on temporal axis
107 -- Period: tuple (start date, end date) of the step of the phylomemy
108 -- Levels: levels of granularity
109 data PhyloPeriod =
110 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
111 , _phylo_periodLevels :: [PhyloLevel]
112 }
113 deriving (Generic, Show)
114
115
116 --------------------
117 -- | PhyloLevel | --
118 --------------------
119
120
121 -- | PhyloLevel : levels of phylomemy on level axis
122 -- Levels description:
123 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
124 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
125 -- Level 1: First level of clustering
126 -- Level N: Nth level of clustering
127 data PhyloLevel =
128 PhyloLevel { _phylo_levelId :: PhyloLevelId
129 , _phylo_levelGroups :: [PhyloGroup]
130 }
131 deriving (Generic, Show)
132
133
134 --------------------
135 -- | PhyloGroup | --
136 --------------------
137
138
139 -- | PhyloGroup : group of ngrams at each level and step
140 -- Label : maybe has a label as text
141 -- Ngrams: set of terms that build the group
142 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
143 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
144 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
145 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
146 data PhyloGroup =
147 PhyloGroup { _phylo_groupId :: PhyloGroupId
148 , _phylo_groupLabel :: Text
149 , _phylo_groupNgrams :: [Int]
150 , _phylo_groupMeta :: Map Text Double
151 , _phylo_groupCooc :: Map (Int, Int) Double
152 , _phylo_groupBranchId :: Maybe PhyloBranchId
153
154 , _phylo_groupPeriodParents :: [Pointer]
155 , _phylo_groupPeriodChilds :: [Pointer]
156
157 , _phylo_groupLevelParents :: [Pointer]
158 , _phylo_groupLevelChilds :: [Pointer]
159 }
160 deriving (Generic, Show, Eq, Ord)
161
162
163 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
164 type Level = Int
165 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
166 type Index = Int
167
168
169 type PhyloPeriodId = (Start, End)
170 type PhyloLevelId = (PhyloPeriodId, Level)
171 type PhyloGroupId = (PhyloLevelId, Index)
172 type PhyloBranchId = (Level, Index)
173
174
175 -- | Weight : A generic mesure that can be associated with an Id
176 type Weight = Double
177 -- | Pointer : A weighted linked with a given PhyloGroup
178 type Pointer = (PhyloGroupId, Weight)
179 -- | Ngrams : a contiguous sequence of n terms
180 type Ngrams = Text
181
182
183 --------------------
184 -- | Aggregates | --
185 --------------------
186
187
188 -- | Document : a piece of Text linked to a Date
189 data Document = Document
190 { date :: Date
191 , text :: Text
192 } deriving (Show)
193
194
195 -- | Clique : Set of ngrams cooccurring in the same Document
196 type Clique = Set Ngrams
197 -- | Support : Number of Documents where a Clique occurs
198 type Support = Int
199 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
200 type PhyloFis = (Clique,Support)
201
202
203 -- | A list of clustered PhyloGroup
204 type PhyloCluster = [PhyloGroup]
205
206
207 -- | A List of PhyloGroup in a Graph
208 type GroupNodes = [PhyloGroup]
209 -- | A List of weighted links between some PhyloGroups in a Graph
210 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
211 -- | The association as a Graph between a list of Nodes and a list of Edges
212 type GroupGraph = (GroupNodes,GroupEdges)
213
214
215 ---------------
216 -- | Error | --
217 ---------------
218
219
220 data PhyloError = LevelDoesNotExist
221 | LevelUnassigned
222 deriving (Show)
223
224
225 -----------------
226 -- | Cluster | --
227 -----------------
228
229
230 -- | Cluster constructors
231 data Cluster = Fis FisParams
232 | RelatedComponents RCParams
233 | Louvain LouvainParams
234 deriving (Show)
235
236 -- | Parameters for Fis clustering
237 data FisParams = FisParams
238 { _fis_filtered :: Bool
239 , _fis_keepMinorFis :: Bool
240 , _fis_minSupport :: Support
241 } deriving (Show)
242
243 -- | Parameters for RelatedComponents clustering
244 data RCParams = RCParams
245 { _rc_proximity :: Proximity } deriving (Show)
246
247 -- | Parameters for Louvain clustering
248 data LouvainParams = LouvainParams
249 { _louvain_proximity :: Proximity } deriving (Show)
250
251
252 -------------------
253 -- | Proximity | --
254 -------------------
255
256
257 -- | Proximity constructors
258 data Proximity = WeightedLogJaccard WLJParams
259 | Hamming HammingParams
260 | Filiation
261 deriving (Show)
262
263 -- | Parameters for WeightedLogJaccard proximity
264 data WLJParams = WLJParams
265 { _wlj_threshold :: Double
266 , _wlj_sensibility :: Double
267 } deriving (Show)
268
269 -- | Parameters for Hamming proximity
270 data HammingParams = HammingParams
271 { _hamming_threshold :: Double } deriving (Show)
272
273
274 ----------------
275 -- | Filter | --
276 ----------------
277
278
279 -- | Filter constructors
280 data Filter = SmallBranch SBParams deriving (Show)
281
282 -- | Parameters for SmallBranch filter
283 data SBParams = SBParams
284 { _sb_periodsInf :: Int
285 , _sb_periodsSup :: Int
286 , _sb_minNodes :: Int } deriving (Show)
287
288
289 ----------------
290 -- | Metric | --
291 ----------------
292
293
294 -- | Metric constructors
295 data Metric = BranchAge deriving (Show)
296
297
298 ----------------
299 -- | Tagger | --
300 ----------------
301
302
303 -- | Tagger constructors
304 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
305
306
307 --------------
308 -- | Sort | --
309 --------------
310
311
312 -- | Sort constructors
313 data Sort = ByBranchAge deriving (Show)
314 data Order = Asc | Desc deriving (Show)
315
316
317 --------------------
318 -- | PhyloQuery | --
319 --------------------
320
321
322 -- | A Phyloquery describes a phylomemic reconstruction
323 data PhyloQuery = PhyloQuery
324 { _q_phyloTitle :: Text
325 , _q_phyloDesc :: Text
326
327 -- Grain and Steps for the PhyloPeriods
328 , _q_periodGrain :: Int
329 , _q_periodSteps :: Int
330
331 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
332 , _q_contextualUnit :: Cluster
333
334 -- Inter-temporal matching method of the Phylo
335 , _q_interTemporalMatching :: Proximity
336
337 -- Last level of reconstruction
338 , _q_nthLevel :: Level
339 -- Clustering method used from level 1 to nthLevel
340 , _q_nthCluster :: Cluster
341 } deriving (Show)
342
343 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
344 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Show)
345 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
346
347
348 -------------------
349 -- | PhyloView | --
350 -------------------
351
352
353 -- | A PhyloView is the output type of a Phylo
354 data PhyloView = PhyloView
355 { _phylo_viewParam :: PhyloParam
356 , _phylo_viewTitle :: Text
357 , _phylo_viewDescription :: Text
358 , _phylo_viewFiliation :: Filiation
359 , _phylo_viewMetrics :: Map Text [Double]
360 , _phylo_viewBranches :: [PhyloBranch]
361 , _phylo_viewNodes :: [PhyloNode]
362 , _phylo_viewEdges :: [PhyloEdge]
363 } deriving (Show)
364
365 -- | A phyloview is made of PhyloBranches, edges and nodes
366 data PhyloBranch = PhyloBranch
367 { _phylo_branchId :: PhyloBranchId
368 , _phylo_branchLabel :: Text
369 , _phylo_branchMetrics :: Map Text [Double]
370 } deriving (Show)
371
372 data PhyloEdge = PhyloEdge
373 { _phylo_edgeSource :: PhyloGroupId
374 , _phylo_edgeTarget :: PhyloGroupId
375 , _phylo_edgeType :: EdgeType
376 , _phylo_edgeWeight :: Weight
377 } deriving (Show)
378
379 data PhyloNode = PhyloNode
380 { _phylo_nodeId :: PhyloGroupId
381 , _phylo_nodeBranchId :: Maybe PhyloBranchId
382 , _phylo_nodeLabel :: Text
383 , _phylo_nodeNgramsIdx :: [Int]
384 , _phylo_nodeNgrams :: Maybe [Ngrams]
385 , _phylo_nodeMetrics :: Map Text [Double]
386 , _phylo_nodeLevelParents :: Maybe [PhyloGroupId]
387 , _phylo_nodeLevelChilds :: [PhyloNode]
388 } deriving (Show)
389
390
391 ------------------------
392 -- | PhyloQueryView | --
393 ------------------------
394
395
396 data DisplayMode = Flat | Nested
397
398 -- | A PhyloQueryView describes a Phylo as an output view
399 data PhyloQueryView = PhyloQueryView
400 { _qv_lvl :: Level
401
402 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
403 , _qv_filiation :: Filiation
404
405 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
406 , _qv_levelChilds :: Bool
407 , _qv_levelChildsDepth :: Level
408
409 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
410 -- Firstly the metrics, then the filters and the taggers
411 , _qv_metrics :: [Metric]
412 , _qv_filters :: [Filter]
413 , _qv_taggers :: [Tagger]
414
415 -- An asc or desc sort to apply to the PhyloGraph
416 , _qv_sort :: Maybe (Sort,Order)
417
418 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
419 , _qv_display :: DisplayMode
420 , _qv_verbose :: Bool
421 }
422
423
424 ----------------
425 -- | Lenses | --
426 ----------------
427
428
429 makeLenses ''PhyloParam
430 makeLenses ''Software
431 --
432 makeLenses ''Phylo
433 makeLenses ''PhyloGroup
434 makeLenses ''PhyloLevel
435 makeLenses ''PhyloPeriod
436 --
437 makeLenses ''Proximity
438 makeLenses ''Cluster
439 makeLenses ''Filter
440 --
441 makeLenses ''PhyloQuery
442 makeLenses ''PhyloQueryView
443 --
444 makeLenses ''PhyloView
445 makeLenses ''PhyloBranch
446 makeLenses ''PhyloNode
447 makeLenses ''PhyloEdge
448
449
450 ------------------------
451 -- | JSON instances | --
452 ------------------------
453
454
455 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
456 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
457 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
458 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
459 --
460 $(deriveJSON (unPrefix "_software_" ) ''Software )
461 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
462 --
463 $(deriveJSON defaultOptions ''Cluster )
464 $(deriveJSON defaultOptions ''Proximity )
465 --
466 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
467 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
468 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
469 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
470 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
471 --
472 $(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
473
474
475 ----------------------------
476 -- | TODO XML instances | --
477 ----------------------------
478