2 Module : Gargantext.Core.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveAnyClass #-}
26 {-# LANGUAGE TemplateHaskell #-}
28 module Gargantext.Core.Viz.Phylo where
30 import Control.DeepSeq
31 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON,defaultOptions)
36 import Data.Text (Text)
37 import Data.Vector (Vector)
38 import GHC.Generics (Generic)
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Prelude
41 import Gargantext.Core.Text.Context (TermList)
48 -- | Global parameters of a Phylo
50 PhyloParam { _phyloParam_version :: !Text -- Double ?
51 , _phyloParam_software :: !Software
52 , _phyloParam_query :: !PhyloQueryBuild
53 } deriving (Generic, Show, Eq)
56 -- | Software parameters
58 Software { _software_name :: !Text
59 , _software_version :: !Text
60 } deriving (Generic, Show, Eq)
68 -- | Phylo datatype of a phylomemy
69 -- Duration : time Segment of the whole Phylo
70 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
71 -- Periods : list of all the periods of a Phylo
73 Phylo { _phylo_duration :: !(Start, End)
74 , _phylo_foundations :: !PhyloFoundations
75 , _phylo_periods :: [PhyloPeriod]
76 , _phylo_docsByYears :: !(Map Date Double)
77 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
78 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
79 , _phylo_param :: !PhyloParam
81 deriving (Generic, Show, Eq)
84 -- | The foundations of a phylomemy created from a given TermList
85 data PhyloFoundations =
86 PhyloFoundations { _phylo_foundationsRoots :: !(Vector Ngrams)
87 , _phylo_foundationsTermsList :: !TermList
88 } deriving (Generic, Show, Eq)
91 -- | Date : a simple Integer
94 -- | UTCTime in seconds since UNIX epoch
95 -- type Start = POSIXTime
96 -- type End = POSIXTime
101 ---------------------
102 -- | PhyloPeriod | --
103 ---------------------
106 -- | PhyloStep : steps of phylomemy on temporal axis
107 -- Period: tuple (start date, end date) of the step of the phylomemy
108 -- Levels: levels of granularity
110 PhyloPeriod { _phylo_periodId :: !PhyloPeriodId
111 , _phylo_periodLevels :: ![PhyloLevel]
113 deriving (Generic, Show, Eq)
121 -- | PhyloLevel : levels of phylomemy on level axis
122 -- Levels description:
123 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
124 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
125 -- Level 1: First level of clustering
126 -- Level N: Nth level of clustering
128 PhyloLevel { _phylo_levelId :: !PhyloLevelId
129 , _phylo_levelGroups :: ![PhyloGroup]
131 deriving (Generic, Show, Eq)
139 -- | PhyloGroup : group of ngrams at each level and step
140 -- Label : maybe has a label as text
141 -- Ngrams: set of terms that build the group
142 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
143 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
144 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
145 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
147 PhyloGroup { _phylo_groupId :: !PhyloGroupId
148 , _phylo_groupLabel :: !Text
149 , _phylo_groupNgrams :: ![Int]
150 , _phylo_groupNgramsMeta :: !(Map Text [Double])
151 , _phylo_groupMeta :: !(Map Text Double)
152 , _phylo_groupBranchId :: !(Maybe PhyloBranchId)
153 , _phylo_groupCooc :: !(Map (Int,Int) Double)
155 , _phylo_groupPeriodParents :: ![Pointer]
156 , _phylo_groupPeriodChilds :: ![Pointer]
158 , _phylo_groupLevelParents :: ![Pointer]
159 , _phylo_groupLevelChilds :: ![Pointer]
161 deriving (Generic, NFData, Show, Eq, Ord)
163 -- instance NFData PhyloGroup
166 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
168 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
172 type PhyloPeriodId = (Start, End)
173 type PhyloLevelId = (PhyloPeriodId, Level)
174 type PhyloGroupId = (PhyloLevelId, Index)
175 type PhyloBranchId = (Level, Index)
178 -- | Weight : A generic mesure that can be associated with an Id
180 -- | Pointer : A weighted linked with a given PhyloGroup
181 type Pointer = (PhyloGroupId, Weight)
182 -- | Ngrams : a contiguous sequence of n terms
191 -- | Document : a piece of Text linked to a Date
192 data Document = Document
195 } deriving (Show,Generic,NFData)
197 -- | Clique : Set of ngrams cooccurring in the same Document
198 type Clique = Set Ngrams
199 -- | Support : Number of Documents where a Clique occurs
201 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
202 data PhyloFis = PhyloFis
203 { _phyloFis_clique :: !Clique
204 , _phyloFis_support :: !Support
205 , _phyloFis_period :: !(Date,Date)
206 } deriving (Generic,NFData,Show,Eq)
208 -- | A list of clustered PhyloGroup
209 type PhyloCluster = [PhyloGroup]
212 -- | A PhyloGroup in a Graph
213 type GroupNode = PhyloGroup
214 -- | A weighted links between two PhyloGroups in a Graph
215 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
216 -- | The association as a Graph between a list of Nodes and a list of Edges
217 type GroupGraph = ([GroupNode],[GroupEdge])
225 data PhyloError = LevelDoesNotExist
235 -- | Cluster constructors
236 data Cluster = Fis FisParams
237 | RelatedComponents RCParams
238 | Louvain LouvainParams
239 deriving (Generic, Show, Eq, Read)
241 -- | Parameters for Fis clustering
242 data FisParams = FisParams
243 { _fis_keepMinorFis :: !Bool
244 , _fis_minSupport :: !Support
245 , _fis_minSize :: !Int
246 } deriving (Generic, Show, Eq, Read)
248 -- | Parameters for RelatedComponents clustering
249 data RCParams = RCParams
250 { _rc_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
252 -- | Parameters for Louvain clustering
253 data LouvainParams = LouvainParams
254 { _louvain_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
262 -- | Proximity constructors
263 data Proximity = WeightedLogJaccard WLJParams
264 | WeightedLogSim WLJParams
265 | Hamming HammingParams
267 deriving (Generic, Show, Eq, Read)
269 -- | Parameters for WeightedLogJaccard and WeightedLogSim proximity
270 data WLJParams = WLJParams
271 { _wlj_threshold :: !Double
272 , _wlj_sensibility :: !Double
273 } deriving (Generic, Show, Eq, Read)
275 -- | Parameters for Hamming proximity
276 data HammingParams = HammingParams
277 { _hamming_threshold :: !Double } deriving (Generic, Show, Eq, Read)
285 -- | Filter constructors
286 data Filter = LonelyBranch LBParams
287 | SizeBranch SBParams
288 deriving (Generic, Show, Eq)
290 -- | Parameters for LonelyBranch filter
291 data LBParams = LBParams
292 { _lb_periodsInf :: !Int
293 , _lb_periodsSup :: !Int
294 , _lb_minNodes :: !Int } deriving (Generic, Show, Eq)
296 -- | Parameters for SizeBranch filter
297 data SBParams = SBParams
298 { _sb_minSize :: !Int } deriving (Generic, Show, Eq)
306 -- | Metric constructors
307 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
315 -- | Tagger constructors
316 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
317 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
325 -- | Sort constructors
326 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
327 data Order = Asc | Desc deriving (Generic, Show, Read)
335 -- | A Phyloquery describes a phylomemic reconstruction
336 data PhyloQueryBuild = PhyloQueryBuild
337 { _q_phyloTitle :: !Text
338 , _q_phyloDesc :: !Text
340 -- Grain and Steps for the PhyloPeriods
341 , _q_periodGrain :: !Int
342 , _q_periodSteps :: !Int
344 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
345 , _q_contextualUnit :: !Cluster
346 , _q_contextualUnitMetrics :: ![Metric]
347 , _q_contextualUnitFilters :: ![Filter]
349 -- Inter-temporal matching method of the Phylo
350 , _q_interTemporalMatching :: !Proximity
351 , _q_interTemporalMatchingFrame :: !Int
352 , _q_interTemporalMatchingFrameTh :: !Double
354 , _q_reBranchThr :: !Double
355 , _q_reBranchNth :: !Int
357 -- Last level of reconstruction
358 , _q_nthLevel :: !Level
359 -- Clustering method used from level 1 to nthLevel
360 , _q_nthCluster :: !Cluster
361 } deriving (Generic, Show, Eq)
363 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
364 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
365 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
372 -- | A PhyloView is the output type of a Phylo
373 data PhyloView = PhyloView
374 { _pv_param :: !PhyloParam
376 , _pv_description :: !Text
377 , _pv_filiation :: !Filiation
378 , _pv_level :: !Level
379 , _pv_periods :: ![PhyloPeriodId]
380 , _pv_metrics :: !(Map Text [Double])
381 , _pv_branches :: ![PhyloBranch]
382 , _pv_nodes :: ![PhyloNode]
383 , _pv_edges :: ![PhyloEdge]
384 } deriving (Generic, Show)
386 -- | A phyloview is made of PhyloBranches, edges and nodes
387 data PhyloBranch = PhyloBranch
388 { _pb_id :: !PhyloBranchId
390 , _pb_metrics :: !(Map Text [Double])
391 } deriving (Generic, Show)
393 data PhyloEdge = PhyloEdge
394 { _pe_source :: !PhyloGroupId
395 , _pe_target :: !PhyloGroupId
396 , _pe_type :: !EdgeType
397 , _pe_weight :: !Weight
398 } deriving (Generic, Show)
400 data PhyloNode = PhyloNode
401 { _pn_id :: !PhyloGroupId
402 , _pn_bid :: !(Maybe PhyloBranchId)
405 , _pn_ngrams :: !(Maybe [Ngrams])
406 , _pn_metrics :: !(Map Text [Double])
407 , _pn_cooc :: !(Map (Int,Int) Double)
408 , _pn_parents :: !(Maybe [PhyloGroupId])
409 , _pn_childs :: ![PhyloNode]
410 } deriving (Generic, Show)
412 ------------------------
413 -- | PhyloQueryView | --
414 ------------------------
417 data ExportMode = Json | Dot | Svg
418 deriving (Generic, Show, Read)
419 data DisplayMode = Flat | Nested
420 deriving (Generic, Show, Read)
422 -- | A PhyloQueryView describes a Phylo as an output view
423 data PhyloQueryView = PhyloQueryView
426 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
427 , _qv_filiation :: !Filiation
429 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
430 , _qv_levelChilds :: !Bool
431 , _qv_levelChildsDepth :: !Level
433 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
434 -- Firstly the metrics, then the filters and the taggers
435 , _qv_metrics :: ![Metric]
436 , _qv_filters :: ![Filter]
437 , _qv_taggers :: ![Tagger]
439 -- An asc or desc sort to apply to the PhyloGraph
440 , _qv_sort :: !(Maybe (Sort,Order))
442 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
443 , _qv_export :: !ExportMode
444 , _qv_display :: !DisplayMode
445 , _qv_verbose :: !Bool
454 makeLenses ''PhyloParam
455 makeLenses ''Software
458 makeLenses ''PhyloFoundations
459 makeLenses ''PhyloGroup
460 makeLenses ''PhyloLevel
461 makeLenses ''PhyloPeriod
462 makeLenses ''PhyloFis
464 makeLenses ''Proximity
468 makeLenses ''PhyloQueryBuild
469 makeLenses ''PhyloQueryView
471 makeLenses ''PhyloView
472 makeLenses ''PhyloBranch
473 makeLenses ''PhyloNode
474 makeLenses ''PhyloEdge
477 ------------------------
478 -- | JSON instances | --
479 ------------------------
482 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
483 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
484 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
485 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
486 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
487 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
489 $(deriveJSON (unPrefix "_software_" ) ''Software )
490 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
492 $(deriveJSON defaultOptions ''Filter )
493 $(deriveJSON defaultOptions ''Metric )
494 $(deriveJSON defaultOptions ''Cluster )
495 $(deriveJSON defaultOptions ''Proximity )
497 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
498 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
499 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
500 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
501 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
503 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
504 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
506 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
507 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
508 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
509 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
510 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
512 $(deriveJSON defaultOptions ''Filiation )
513 $(deriveJSON defaultOptions ''EdgeType )
515 ---------------------------
516 -- | Swagger instances | --
517 ---------------------------
519 instance ToSchema Phylo where
520 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
521 instance ToSchema PhyloFoundations where
522 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
523 instance ToSchema PhyloPeriod where
524 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
525 instance ToSchema PhyloLevel where
526 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
527 instance ToSchema PhyloGroup where
528 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
529 instance ToSchema PhyloFis where
530 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
531 instance ToSchema Software where
532 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
533 instance ToSchema PhyloParam where
534 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
535 instance ToSchema Filter
536 instance ToSchema Metric
537 instance ToSchema Cluster
538 instance ToSchema Proximity where
539 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
540 instance ToSchema FisParams where
541 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
542 instance ToSchema HammingParams where
543 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
544 instance ToSchema LouvainParams where
545 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
546 instance ToSchema RCParams where
547 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
548 instance ToSchema WLJParams where
549 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
550 instance ToSchema LBParams where
551 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
552 instance ToSchema SBParams where
553 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
554 instance ToSchema PhyloQueryBuild where
555 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
556 instance ToSchema PhyloView where
557 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
558 instance ToSchema PhyloBranch where
559 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
560 instance ToSchema PhyloEdge where
561 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
562 instance ToSchema PhyloNode where
563 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
564 instance ToSchema Filiation
565 instance ToSchema EdgeType
567 ----------------------------
568 -- | TODO XML instances | --
569 ----------------------------