2 Module : Gargantext.Core.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveAnyClass #-}
26 {-# LANGUAGE TemplateHaskell #-}
28 module Gargantext.Core.Viz.Phylo where
30 import Control.DeepSeq
31 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON,defaultOptions)
36 import Data.Text (Text)
37 import Data.Vector (Vector)
38 import GHC.Generics (Generic)
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Prelude
41 import Gargantext.Core.Text.Context (TermList)
48 -- | Global parameters of a Phylo
50 PhyloParam { _phyloParam_version :: Text -- Double ?
51 , _phyloParam_software :: Software
52 , _phyloParam_query :: PhyloQueryBuild
53 } deriving (Generic, Show, Eq)
56 -- | Software parameters
58 Software { _software_name :: Text
59 , _software_version :: Text
60 } deriving (Generic, Show, Eq)
68 -- | Phylo datatype of a phylomemy
69 -- Duration : time Segment of the whole Phylo
70 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
71 -- Periods : list of all the periods of a Phylo
73 Phylo { _phylo_duration :: (Start, End)
74 , _phylo_foundations :: PhyloFoundations
75 , _phylo_periods :: [PhyloPeriod]
76 , _phylo_docsByYears :: Map Date Double
77 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
78 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
79 , _phylo_param :: PhyloParam
81 deriving (Generic, Show, Eq)
84 -- | The foundations of a phylomemy created from a given TermList
85 data PhyloFoundations =
86 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
87 , _phylo_foundationsTermsList :: TermList
88 } deriving (Generic, Show, Eq)
91 -- | Date : a simple Integer
94 -- | UTCTime in seconds since UNIX epoch
95 -- type Start = POSIXTime
96 -- type End = POSIXTime
101 ---------------------
102 -- | PhyloPeriod | --
103 ---------------------
106 -- | PhyloStep : steps of phylomemy on temporal axis
107 -- Period: tuple (start date, end date) of the step of the phylomemy
108 -- Levels: levels of granularity
110 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
111 , _phylo_periodLevels :: [PhyloLevel]
113 deriving (Generic, Show, Eq)
121 -- | PhyloLevel : levels of phylomemy on level axis
122 -- Levels description:
123 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
124 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
125 -- Level 1: First level of clustering
126 -- Level N: Nth level of clustering
128 PhyloLevel { _phylo_levelId :: PhyloLevelId
129 , _phylo_levelGroups :: [PhyloGroup]
131 deriving (Generic, Show, Eq)
139 -- | PhyloGroup : group of ngrams at each level and step
140 -- Label : maybe has a label as text
141 -- Ngrams: set of terms that build the group
142 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
143 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
144 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
145 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
147 PhyloGroup { _phylo_groupId :: PhyloGroupId
148 , _phylo_groupLabel :: Text
149 , _phylo_groupNgrams :: [Int]
150 , _phylo_groupNgramsMeta :: Map Text [Double]
151 , _phylo_groupMeta :: Map Text Double
152 , _phylo_groupBranchId :: Maybe PhyloBranchId
153 , _phylo_groupCooc :: !(Map (Int,Int) Double)
155 , _phylo_groupPeriodParents :: [Pointer]
156 , _phylo_groupPeriodChilds :: [Pointer]
158 , _phylo_groupLevelParents :: [Pointer]
159 , _phylo_groupLevelChilds :: [Pointer]
161 deriving (Generic, NFData, Show, Eq, Ord)
163 -- instance NFData PhyloGroup
166 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
168 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
172 type PhyloPeriodId = (Start, End)
173 type PhyloLevelId = (PhyloPeriodId, Level)
174 type PhyloGroupId = (PhyloLevelId, Index)
175 type PhyloBranchId = (Level, Index)
178 -- | Weight : A generic mesure that can be associated with an Id
180 -- | Pointer : A weighted linked with a given PhyloGroup
181 type Pointer = (PhyloGroupId, Weight)
182 -- | Ngrams : a contiguous sequence of n terms
191 -- | Document : a piece of Text linked to a Date
192 data Document = Document
195 } deriving (Show,Generic,NFData)
197 -- | Clique : Set of ngrams cooccurring in the same Document
198 type Clique = Set Ngrams
199 -- | Support : Number of Documents where a Clique occurs
201 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
202 data PhyloFis = PhyloFis
203 { _phyloFis_clique :: Clique
204 , _phyloFis_support :: Support
205 , _phyloFis_period :: (Date,Date)
206 } deriving (Generic,NFData,Show,Eq)
208 -- | A list of clustered PhyloGroup
209 type PhyloCluster = [PhyloGroup]
212 -- | A PhyloGroup in a Graph
213 type GroupNode = PhyloGroup
214 -- | A weighted links between two PhyloGroups in a Graph
215 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
216 -- | The association as a Graph between a list of Nodes and a list of Edges
217 type GroupGraph = ([GroupNode],[GroupEdge])
225 data PhyloError = LevelDoesNotExist
235 -- | Cluster constructors
236 data Cluster = Fis FisParams
237 | RelatedComponents RCParams
238 | Louvain LouvainParams
239 deriving (Generic, Show, Eq, Read)
241 -- | Parameters for Fis clustering
242 data FisParams = FisParams
243 { _fis_keepMinorFis :: Bool
244 , _fis_minSupport :: Support
245 , _fis_minSize :: Int
246 } deriving (Generic, Show, Eq, Read)
248 -- | Parameters for RelatedComponents clustering
249 data RCParams = RCParams
250 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
252 -- | Parameters for Louvain clustering
253 data LouvainParams = LouvainParams
254 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
262 -- | Proximity constructors
263 data Proximity = WeightedLogJaccard WLJParams
264 | Hamming HammingParams
266 deriving (Generic, Show, Eq, Read)
268 -- | Parameters for WeightedLogJaccard proximity
269 data WLJParams = WLJParams
270 { _wlj_threshold :: Double
271 , _wlj_sensibility :: Double
272 } deriving (Generic, Show, Eq, Read)
274 -- | Parameters for Hamming proximity
275 data HammingParams = HammingParams
276 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
284 -- | Filter constructors
285 data Filter = LonelyBranch LBParams
286 | SizeBranch SBParams
287 deriving (Generic, Show, Eq)
289 -- | Parameters for LonelyBranch filter
290 data LBParams = LBParams
291 { _lb_periodsInf :: Int
292 , _lb_periodsSup :: Int
293 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
295 -- | Parameters for SizeBranch filter
296 data SBParams = SBParams
297 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
305 -- | Metric constructors
306 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
314 -- | Tagger constructors
315 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
316 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
324 -- | Sort constructors
325 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
326 data Order = Asc | Desc deriving (Generic, Show, Read)
334 -- | A Phyloquery describes a phylomemic reconstruction
335 data PhyloQueryBuild = PhyloQueryBuild
336 { _q_phyloTitle :: Text
337 , _q_phyloDesc :: Text
339 -- Grain and Steps for the PhyloPeriods
340 , _q_periodGrain :: Int
341 , _q_periodSteps :: Int
343 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
344 , _q_contextualUnit :: Cluster
345 , _q_contextualUnitMetrics :: [Metric]
346 , _q_contextualUnitFilters :: [Filter]
348 -- Inter-temporal matching method of the Phylo
349 , _q_interTemporalMatching :: Proximity
350 , _q_interTemporalMatchingFrame :: Int
351 , _q_interTemporalMatchingFrameTh :: Double
353 , _q_reBranchThr :: Double
354 , _q_reBranchNth :: Int
356 -- Last level of reconstruction
357 , _q_nthLevel :: Level
358 -- Clustering method used from level 1 to nthLevel
359 , _q_nthCluster :: Cluster
360 } deriving (Generic, Show, Eq)
362 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
363 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
364 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
371 -- | A PhyloView is the output type of a Phylo
372 data PhyloView = PhyloView
373 { _pv_param :: PhyloParam
375 , _pv_description :: Text
376 , _pv_filiation :: Filiation
378 , _pv_periods :: [PhyloPeriodId]
379 , _pv_metrics :: Map Text [Double]
380 , _pv_branches :: [PhyloBranch]
381 , _pv_nodes :: [PhyloNode]
382 , _pv_edges :: [PhyloEdge]
383 } deriving (Generic, Show)
385 -- | A phyloview is made of PhyloBranches, edges and nodes
386 data PhyloBranch = PhyloBranch
387 { _pb_id :: PhyloBranchId
389 , _pb_metrics :: Map Text [Double]
390 } deriving (Generic, Show)
392 data PhyloEdge = PhyloEdge
393 { _pe_source :: PhyloGroupId
394 , _pe_target :: PhyloGroupId
395 , _pe_type :: EdgeType
396 , _pe_weight :: Weight
397 } deriving (Generic, Show)
399 data PhyloNode = PhyloNode
400 { _pn_id :: PhyloGroupId
401 , _pn_bid :: Maybe PhyloBranchId
404 , _pn_ngrams :: Maybe [Ngrams]
405 , _pn_metrics :: Map Text [Double]
406 , _pn_cooc :: Map (Int,Int) Double
407 , _pn_parents :: Maybe [PhyloGroupId]
408 , _pn_childs :: [PhyloNode]
409 } deriving (Generic, Show)
411 ------------------------
412 -- | PhyloQueryView | --
413 ------------------------
416 data ExportMode = Json | Dot | Svg
417 deriving (Generic, Show, Read)
418 data DisplayMode = Flat | Nested
419 deriving (Generic, Show, Read)
421 -- | A PhyloQueryView describes a Phylo as an output view
422 data PhyloQueryView = PhyloQueryView
425 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
426 , _qv_filiation :: Filiation
428 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
429 , _qv_levelChilds :: Bool
430 , _qv_levelChildsDepth :: Level
432 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
433 -- Firstly the metrics, then the filters and the taggers
434 , _qv_metrics :: [Metric]
435 , _qv_filters :: [Filter]
436 , _qv_taggers :: [Tagger]
438 -- An asc or desc sort to apply to the PhyloGraph
439 , _qv_sort :: Maybe (Sort,Order)
441 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
442 , _qv_export :: ExportMode
443 , _qv_display :: DisplayMode
444 , _qv_verbose :: Bool
453 makeLenses ''PhyloParam
454 makeLenses ''Software
457 makeLenses ''PhyloFoundations
458 makeLenses ''PhyloGroup
459 makeLenses ''PhyloLevel
460 makeLenses ''PhyloPeriod
461 makeLenses ''PhyloFis
463 makeLenses ''Proximity
467 makeLenses ''PhyloQueryBuild
468 makeLenses ''PhyloQueryView
470 makeLenses ''PhyloView
471 makeLenses ''PhyloBranch
472 makeLenses ''PhyloNode
473 makeLenses ''PhyloEdge
476 ------------------------
477 -- | JSON instances | --
478 ------------------------
481 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
482 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
483 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
484 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
485 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
486 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
488 $(deriveJSON (unPrefix "_software_" ) ''Software )
489 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
491 $(deriveJSON defaultOptions ''Filter )
492 $(deriveJSON defaultOptions ''Metric )
493 $(deriveJSON defaultOptions ''Cluster )
494 $(deriveJSON defaultOptions ''Proximity )
496 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
497 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
498 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
499 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
500 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
502 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
503 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
505 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
506 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
507 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
508 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
509 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
511 $(deriveJSON defaultOptions ''Filiation )
512 $(deriveJSON defaultOptions ''EdgeType )
514 ---------------------------
515 -- | Swagger instances | --
516 ---------------------------
518 instance ToSchema Phylo where
519 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
520 instance ToSchema PhyloFoundations where
521 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
522 instance ToSchema PhyloPeriod where
523 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
524 instance ToSchema PhyloLevel where
525 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
526 instance ToSchema PhyloGroup where
527 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
528 instance ToSchema PhyloFis where
529 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
530 instance ToSchema Software where
531 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
532 instance ToSchema PhyloParam where
533 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
534 instance ToSchema Filter
535 instance ToSchema Metric
536 instance ToSchema Cluster
537 instance ToSchema Proximity where
538 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
539 instance ToSchema FisParams where
540 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
541 instance ToSchema HammingParams where
542 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
543 instance ToSchema LouvainParams where
544 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
545 instance ToSchema RCParams where
546 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
547 instance ToSchema WLJParams where
548 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
549 instance ToSchema LBParams where
550 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
551 instance ToSchema SBParams where
552 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
553 instance ToSchema PhyloQueryBuild where
554 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
555 instance ToSchema PhyloView where
556 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
557 instance ToSchema PhyloBranch where
558 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
559 instance ToSchema PhyloEdge where
560 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
561 instance ToSchema PhyloNode where
562 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
563 instance ToSchema Filiation
564 instance ToSchema EdgeType
566 ----------------------------
567 -- | TODO XML instances | --
568 ----------------------------