]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
add a new group by docs
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
37 import Data.Set (Set)
38 import Data.Map (Map)
39 import Data.Vector (Vector)
40 import Data.Swagger
41 --import Data.Time.Clock.POSIX (POSIXTime)
42 import GHC.Generics (Generic)
43 --import Gargantext.Database.Schema.Ngrams (NgramsId)
44 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
45 import Gargantext.Text.Context (TermList)
46 import Gargantext.Prelude
47
48 import Control.DeepSeq
49
50 --------------------
51 -- | PhyloParam | --
52 --------------------
53
54
55 -- | Global parameters of a Phylo
56 data PhyloParam =
57 PhyloParam { _phyloParam_version :: Text -- Double ?
58 , _phyloParam_software :: Software
59 , _phyloParam_query :: PhyloQueryBuild
60 } deriving (Generic, Show, Eq)
61
62
63 -- | Software parameters
64 data Software =
65 Software { _software_name :: Text
66 , _software_version :: Text
67 } deriving (Generic, Show, Eq)
68
69
70 ---------------
71 -- | Phylo | --
72 ---------------
73
74
75 -- | Phylo datatype of a phylomemy
76 -- Duration : time Segment of the whole Phylo
77 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
78 -- Periods : list of all the periods of a Phylo
79 data Phylo =
80 Phylo { _phylo_duration :: (Start, End)
81 , _phylo_foundations :: PhyloFoundations
82 , _phylo_periods :: [PhyloPeriod]
83 , _phylo_docsByYears :: Map Date Double
84 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
85 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
86 , _phylo_param :: PhyloParam
87 }
88 deriving (Generic, Show, Eq)
89
90
91 -- | The foundations of a phylomemy created from a given TermList
92 data PhyloFoundations =
93 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
94 , _phylo_foundationsTermsList :: TermList
95 } deriving (Generic, Show, Eq)
96
97
98 -- | Date : a simple Integer
99 type Date = Int
100
101 -- | UTCTime in seconds since UNIX epoch
102 -- type Start = POSIXTime
103 -- type End = POSIXTime
104 type Start = Date
105 type End = Date
106
107
108 ---------------------
109 -- | PhyloPeriod | --
110 ---------------------
111
112
113 -- | PhyloStep : steps of phylomemy on temporal axis
114 -- Period: tuple (start date, end date) of the step of the phylomemy
115 -- Levels: levels of granularity
116 data PhyloPeriod =
117 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
118 , _phylo_periodLevels :: [PhyloLevel]
119 }
120 deriving (Generic, Show, Eq)
121
122
123 --------------------
124 -- | PhyloLevel | --
125 --------------------
126
127
128 -- | PhyloLevel : levels of phylomemy on level axis
129 -- Levels description:
130 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
131 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
132 -- Level 1: First level of clustering
133 -- Level N: Nth level of clustering
134 data PhyloLevel =
135 PhyloLevel { _phylo_levelId :: PhyloLevelId
136 , _phylo_levelGroups :: [PhyloGroup]
137 }
138 deriving (Generic, Show, Eq)
139
140
141 --------------------
142 -- | PhyloGroup | --
143 --------------------
144
145
146 -- | PhyloGroup : group of ngrams at each level and step
147 -- Label : maybe has a label as text
148 -- Ngrams: set of terms that build the group
149 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
150 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
151 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
152 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
153 data PhyloGroup =
154 PhyloGroup { _phylo_groupId :: PhyloGroupId
155 , _phylo_groupLabel :: Text
156 , _phylo_groupNgrams :: [Int]
157 , _phylo_groupNgramsMeta :: Map Text [Double]
158 , _phylo_groupMeta :: Map Text Double
159 , _phylo_groupBranchId :: Maybe PhyloBranchId
160 , _phylo_groupCooc :: !(Map (Int,Int) Double)
161
162 , _phylo_groupPeriodParents :: [Pointer]
163 , _phylo_groupPeriodChilds :: [Pointer]
164
165 , _phylo_groupLevelParents :: [Pointer]
166 , _phylo_groupLevelChilds :: [Pointer]
167 }
168 deriving (Generic, NFData, Show, Eq, Ord)
169
170 -- instance NFData PhyloGroup
171
172
173 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
174 type Level = Int
175 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
176 type Index = Int
177
178
179 type PhyloPeriodId = (Start, End)
180 type PhyloLevelId = (PhyloPeriodId, Level)
181 type PhyloGroupId = (PhyloLevelId, Index)
182 type PhyloBranchId = (Level, Index)
183
184
185 -- | Weight : A generic mesure that can be associated with an Id
186 type Weight = Double
187 -- | Pointer : A weighted linked with a given PhyloGroup
188 type Pointer = (PhyloGroupId, Weight)
189 -- | Ngrams : a contiguous sequence of n terms
190 type Ngrams = Text
191
192
193 --------------------
194 -- | Aggregates | --
195 --------------------
196
197
198 -- | Document : a piece of Text linked to a Date
199 data Document = Document
200 { date :: Date
201 , text :: [Ngrams]
202 } deriving (Show,Generic,NFData)
203
204 -- | Clique : Set of ngrams cooccurring in the same Document
205 type Clique = Set Ngrams
206 -- | Support : Number of Documents where a Clique occurs
207 type Support = Int
208 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
209 data PhyloFis = PhyloFis
210 { _phyloFis_clique :: Clique
211 , _phyloFis_support :: Support
212 , _phyloFis_period :: (Date,Date)
213 } deriving (Generic,NFData,Show,Eq)
214
215 -- | A list of clustered PhyloGroup
216 type PhyloCluster = [PhyloGroup]
217
218
219 -- | A PhyloGroup in a Graph
220 type GroupNode = PhyloGroup
221 -- | A weighted links between two PhyloGroups in a Graph
222 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
223 -- | The association as a Graph between a list of Nodes and a list of Edges
224 type GroupGraph = ([GroupNode],[GroupEdge])
225
226
227 ---------------
228 -- | Error | --
229 ---------------
230
231
232 data PhyloError = LevelDoesNotExist
233 | LevelUnassigned
234 deriving (Show)
235
236
237 -----------------
238 -- | Cluster | --
239 -----------------
240
241
242 -- | Cluster constructors
243 data Cluster = Fis FisParams
244 | RelatedComponents RCParams
245 | Louvain LouvainParams
246 deriving (Generic, Show, Eq, Read)
247
248 -- | Parameters for Fis clustering
249 data FisParams = FisParams
250 { _fis_keepMinorFis :: Bool
251 , _fis_minSupport :: Support
252 , _fis_minSize :: Int
253 } deriving (Generic, Show, Eq, Read)
254
255 -- | Parameters for RelatedComponents clustering
256 data RCParams = RCParams
257 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
258
259 -- | Parameters for Louvain clustering
260 data LouvainParams = LouvainParams
261 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
262
263
264 -------------------
265 -- | Proximity | --
266 -------------------
267
268
269 -- | Proximity constructors
270 data Proximity = WeightedLogJaccard WLJParams
271 | Hamming HammingParams
272 | Filiation
273 deriving (Generic, Show, Eq, Read)
274
275 -- | Parameters for WeightedLogJaccard proximity
276 data WLJParams = WLJParams
277 { _wlj_threshold :: Double
278 , _wlj_sensibility :: Double
279 } deriving (Generic, Show, Eq, Read)
280
281 -- | Parameters for Hamming proximity
282 data HammingParams = HammingParams
283 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
284
285
286 ----------------
287 -- | Filter | --
288 ----------------
289
290
291 -- | Filter constructors
292 data Filter = LonelyBranch LBParams
293 | SizeBranch SBParams
294 deriving (Generic, Show, Eq)
295
296 -- | Parameters for LonelyBranch filter
297 data LBParams = LBParams
298 { _lb_periodsInf :: Int
299 , _lb_periodsSup :: Int
300 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
301
302 -- | Parameters for SizeBranch filter
303 data SBParams = SBParams
304 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
305
306
307 ----------------
308 -- | Metric | --
309 ----------------
310
311
312 -- | Metric constructors
313 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
314
315
316 ----------------
317 -- | Tagger | --
318 ----------------
319
320
321 -- | Tagger constructors
322 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
323 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
324
325
326 --------------
327 -- | Sort | --
328 --------------
329
330
331 -- | Sort constructors
332 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
333 data Order = Asc | Desc deriving (Generic, Show, Read)
334
335
336 --------------------
337 -- | PhyloQuery | --
338 --------------------
339
340
341 -- | A Phyloquery describes a phylomemic reconstruction
342 data PhyloQueryBuild = PhyloQueryBuild
343 { _q_phyloTitle :: Text
344 , _q_phyloDesc :: Text
345
346 -- Grain and Steps for the PhyloPeriods
347 , _q_periodGrain :: Int
348 , _q_periodSteps :: Int
349
350 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
351 , _q_contextualUnit :: Cluster
352 , _q_contextualUnitMetrics :: [Metric]
353 , _q_contextualUnitFilters :: [Filter]
354
355 -- Inter-temporal matching method of the Phylo
356 , _q_interTemporalMatching :: Proximity
357 , _q_interTemporalMatchingFrame :: Int
358 , _q_interTemporalMatchingFrameTh :: Double
359
360 , _q_reBranchThr :: Double
361 , _q_reBranchNth :: Int
362
363 -- Last level of reconstruction
364 , _q_nthLevel :: Level
365 -- Clustering method used from level 1 to nthLevel
366 , _q_nthCluster :: Cluster
367 } deriving (Generic, Show, Eq)
368
369 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
370 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
371 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
372
373 -------------------
374 -- | PhyloView | --
375 -------------------
376
377
378 -- | A PhyloView is the output type of a Phylo
379 data PhyloView = PhyloView
380 { _pv_param :: PhyloParam
381 , _pv_title :: Text
382 , _pv_description :: Text
383 , _pv_filiation :: Filiation
384 , _pv_level :: Level
385 , _pv_periods :: [PhyloPeriodId]
386 , _pv_metrics :: Map Text [Double]
387 , _pv_branches :: [PhyloBranch]
388 , _pv_nodes :: [PhyloNode]
389 , _pv_edges :: [PhyloEdge]
390 } deriving (Generic, Show)
391
392 -- | A phyloview is made of PhyloBranches, edges and nodes
393 data PhyloBranch = PhyloBranch
394 { _pb_id :: PhyloBranchId
395 , _pb_peak :: Text
396 , _pb_metrics :: Map Text [Double]
397 } deriving (Generic, Show)
398
399 data PhyloEdge = PhyloEdge
400 { _pe_source :: PhyloGroupId
401 , _pe_target :: PhyloGroupId
402 , _pe_type :: EdgeType
403 , _pe_weight :: Weight
404 } deriving (Generic, Show)
405
406 data PhyloNode = PhyloNode
407 { _pn_id :: PhyloGroupId
408 , _pn_bid :: Maybe PhyloBranchId
409 , _pn_label :: Text
410 , _pn_idx :: [Int]
411 , _pn_ngrams :: Maybe [Ngrams]
412 , _pn_metrics :: Map Text [Double]
413 , _pn_cooc :: Map (Int,Int) Double
414 , _pn_parents :: Maybe [PhyloGroupId]
415 , _pn_childs :: [PhyloNode]
416 } deriving (Generic, Show)
417
418 ------------------------
419 -- | PhyloQueryView | --
420 ------------------------
421
422
423 data ExportMode = Json | Dot | Svg
424 deriving (Generic, Show, Read)
425 data DisplayMode = Flat | Nested
426 deriving (Generic, Show, Read)
427
428 -- | A PhyloQueryView describes a Phylo as an output view
429 data PhyloQueryView = PhyloQueryView
430 { _qv_lvl :: Level
431
432 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
433 , _qv_filiation :: Filiation
434
435 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
436 , _qv_levelChilds :: Bool
437 , _qv_levelChildsDepth :: Level
438
439 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
440 -- Firstly the metrics, then the filters and the taggers
441 , _qv_metrics :: [Metric]
442 , _qv_filters :: [Filter]
443 , _qv_taggers :: [Tagger]
444
445 -- An asc or desc sort to apply to the PhyloGraph
446 , _qv_sort :: Maybe (Sort,Order)
447
448 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
449 , _qv_export :: ExportMode
450 , _qv_display :: DisplayMode
451 , _qv_verbose :: Bool
452 }
453
454
455 ----------------
456 -- | Lenses | --
457 ----------------
458
459
460 makeLenses ''PhyloParam
461 makeLenses ''Software
462 --
463 makeLenses ''Phylo
464 makeLenses ''PhyloFoundations
465 makeLenses ''PhyloGroup
466 makeLenses ''PhyloLevel
467 makeLenses ''PhyloPeriod
468 makeLenses ''PhyloFis
469 --
470 makeLenses ''Proximity
471 makeLenses ''Cluster
472 makeLenses ''Filter
473 --
474 makeLenses ''PhyloQueryBuild
475 makeLenses ''PhyloQueryView
476 --
477 makeLenses ''PhyloView
478 makeLenses ''PhyloBranch
479 makeLenses ''PhyloNode
480 makeLenses ''PhyloEdge
481
482
483 ------------------------
484 -- | JSON instances | --
485 ------------------------
486
487
488 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
489 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
490 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
491 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
492 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
493 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
494 --
495 $(deriveJSON (unPrefix "_software_" ) ''Software )
496 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
497 --
498 $(deriveJSON defaultOptions ''Filter )
499 $(deriveJSON defaultOptions ''Metric )
500 $(deriveJSON defaultOptions ''Cluster )
501 $(deriveJSON defaultOptions ''Proximity )
502 --
503 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
504 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
505 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
506 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
507 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
508 --
509 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
510 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
511 --
512 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
513 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
514 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
515 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
516 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
517
518 $(deriveJSON defaultOptions ''Filiation )
519 $(deriveJSON defaultOptions ''EdgeType )
520
521 ---------------------------
522 -- | Swagger instances | --
523 ---------------------------
524
525 instance ToSchema Phylo where
526 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
527 instance ToSchema PhyloFoundations where
528 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
529 instance ToSchema PhyloPeriod where
530 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
531 instance ToSchema PhyloLevel where
532 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
533 instance ToSchema PhyloGroup where
534 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
535 instance ToSchema PhyloFis where
536 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
537 instance ToSchema Software where
538 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
539 instance ToSchema PhyloParam where
540 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
541 instance ToSchema Filter
542 instance ToSchema Metric
543 instance ToSchema Cluster
544 instance ToSchema Proximity where
545 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
546 instance ToSchema FisParams where
547 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
548 instance ToSchema HammingParams where
549 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
550 instance ToSchema LouvainParams where
551 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
552 instance ToSchema RCParams where
553 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
554 instance ToSchema WLJParams where
555 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
556 instance ToSchema LBParams where
557 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
558 instance ToSchema SBParams where
559 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
560 instance ToSchema PhyloQueryBuild where
561 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
562 instance ToSchema PhyloView where
563 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
564 instance ToSchema PhyloBranch where
565 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
566 instance ToSchema PhyloEdge where
567 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
568 instance ToSchema PhyloNode where
569 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
570 instance ToSchema Filiation
571 instance ToSchema EdgeType
572
573 ----------------------------
574 -- | TODO XML instances | --
575 ----------------------------
576