2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.DeepSeq
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
36 import Data.Maybe (Maybe)
39 import Data.Text (Text)
40 import Data.Vector (Vector)
41 import GHC.Generics (Generic)
42 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
43 import Gargantext.Prelude
44 import Gargantext.Text.Context (TermList)
45 import Prelude (Bounded)
52 -- | Global parameters of a Phylo
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
60 -- | Software parameters
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_docsByYears :: Map Date Double
81 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
82 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
83 , _phylo_param :: PhyloParam
85 deriving (Generic, Show, Eq)
88 -- | The foundations of a phylomemy created from a given TermList
89 data PhyloFoundations =
90 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
91 , _phylo_foundationsTermsList :: TermList
92 } deriving (Generic, Show, Eq)
95 -- | Date : a simple Integer
98 -- | UTCTime in seconds since UNIX epoch
99 -- type Start = POSIXTime
100 -- type End = POSIXTime
105 ---------------------
106 -- | PhyloPeriod | --
107 ---------------------
110 -- | PhyloStep : steps of phylomemy on temporal axis
111 -- Period: tuple (start date, end date) of the step of the phylomemy
112 -- Levels: levels of granularity
114 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
115 , _phylo_periodLevels :: [PhyloLevel]
117 deriving (Generic, Show, Eq)
125 -- | PhyloLevel : levels of phylomemy on level axis
126 -- Levels description:
127 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
128 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
129 -- Level 1: First level of clustering
130 -- Level N: Nth level of clustering
132 PhyloLevel { _phylo_levelId :: PhyloLevelId
133 , _phylo_levelGroups :: [PhyloGroup]
135 deriving (Generic, Show, Eq)
143 -- | PhyloGroup : group of ngrams at each level and step
144 -- Label : maybe has a label as text
145 -- Ngrams: set of terms that build the group
146 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
147 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
148 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
149 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
151 PhyloGroup { _phylo_groupId :: PhyloGroupId
152 , _phylo_groupLabel :: Text
153 , _phylo_groupNgrams :: [Int]
154 , _phylo_groupNgramsMeta :: Map Text [Double]
155 , _phylo_groupMeta :: Map Text Double
156 , _phylo_groupBranchId :: Maybe PhyloBranchId
157 , _phylo_groupCooc :: !(Map (Int,Int) Double)
159 , _phylo_groupPeriodParents :: [Pointer]
160 , _phylo_groupPeriodChilds :: [Pointer]
162 , _phylo_groupLevelParents :: [Pointer]
163 , _phylo_groupLevelChilds :: [Pointer]
165 deriving (Generic, NFData, Show, Eq, Ord)
167 -- instance NFData PhyloGroup
170 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
172 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
176 type PhyloPeriodId = (Start, End)
177 type PhyloLevelId = (PhyloPeriodId, Level)
178 type PhyloGroupId = (PhyloLevelId, Index)
179 type PhyloBranchId = (Level, Index)
182 -- | Weight : A generic mesure that can be associated with an Id
184 -- | Pointer : A weighted linked with a given PhyloGroup
185 type Pointer = (PhyloGroupId, Weight)
186 -- | Ngrams : a contiguous sequence of n terms
195 -- | Document : a piece of Text linked to a Date
196 data Document = Document
199 } deriving (Show,Generic,NFData)
201 -- | Clique : Set of ngrams cooccurring in the same Document
202 type Clique = Set Ngrams
203 -- | Support : Number of Documents where a Clique occurs
205 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
206 data PhyloFis = PhyloFis
207 { _phyloFis_clique :: Clique
208 , _phyloFis_support :: Support
209 , _phyloFis_period :: (Date,Date)
210 } deriving (Generic,NFData,Show,Eq)
212 -- | A list of clustered PhyloGroup
213 type PhyloCluster = [PhyloGroup]
216 -- | A PhyloGroup in a Graph
217 type GroupNode = PhyloGroup
218 -- | A weighted links between two PhyloGroups in a Graph
219 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
220 -- | The association as a Graph between a list of Nodes and a list of Edges
221 type GroupGraph = ([GroupNode],[GroupEdge])
229 data PhyloError = LevelDoesNotExist
239 -- | Cluster constructors
240 data Cluster = Fis FisParams
241 | RelatedComponents RCParams
242 | Louvain LouvainParams
243 deriving (Generic, Show, Eq, Read)
245 -- | Parameters for Fis clustering
246 data FisParams = FisParams
247 { _fis_keepMinorFis :: Bool
248 , _fis_minSupport :: Support
249 , _fis_minSize :: Int
250 } deriving (Generic, Show, Eq, Read)
252 -- | Parameters for RelatedComponents clustering
253 data RCParams = RCParams
254 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
256 -- | Parameters for Louvain clustering
257 data LouvainParams = LouvainParams
258 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
266 -- | Proximity constructors
267 data Proximity = WeightedLogJaccard WLJParams
268 | Hamming HammingParams
270 deriving (Generic, Show, Eq, Read)
272 -- | Parameters for WeightedLogJaccard proximity
273 data WLJParams = WLJParams
274 { _wlj_threshold :: Double
275 , _wlj_sensibility :: Double
276 } deriving (Generic, Show, Eq, Read)
278 -- | Parameters for Hamming proximity
279 data HammingParams = HammingParams
280 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
288 -- | Filter constructors
289 data Filter = LonelyBranch LBParams
290 | SizeBranch SBParams
291 deriving (Generic, Show, Eq)
293 -- | Parameters for LonelyBranch filter
294 data LBParams = LBParams
295 { _lb_periodsInf :: Int
296 , _lb_periodsSup :: Int
297 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
299 -- | Parameters for SizeBranch filter
300 data SBParams = SBParams
301 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
309 -- | Metric constructors
310 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
318 -- | Tagger constructors
319 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
320 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
328 -- | Sort constructors
329 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
330 data Order = Asc | Desc deriving (Generic, Show, Read)
338 -- | A Phyloquery describes a phylomemic reconstruction
339 data PhyloQueryBuild = PhyloQueryBuild
340 { _q_phyloTitle :: Text
341 , _q_phyloDesc :: Text
343 -- Grain and Steps for the PhyloPeriods
344 , _q_periodGrain :: Int
345 , _q_periodSteps :: Int
347 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
348 , _q_contextualUnit :: Cluster
349 , _q_contextualUnitMetrics :: [Metric]
350 , _q_contextualUnitFilters :: [Filter]
352 -- Inter-temporal matching method of the Phylo
353 , _q_interTemporalMatching :: Proximity
354 , _q_interTemporalMatchingFrame :: Int
355 , _q_interTemporalMatchingFrameTh :: Double
357 , _q_reBranchThr :: Double
358 , _q_reBranchNth :: Int
360 -- Last level of reconstruction
361 , _q_nthLevel :: Level
362 -- Clustering method used from level 1 to nthLevel
363 , _q_nthCluster :: Cluster
364 } deriving (Generic, Show, Eq)
366 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
367 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
368 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
375 -- | A PhyloView is the output type of a Phylo
376 data PhyloView = PhyloView
377 { _pv_param :: PhyloParam
379 , _pv_description :: Text
380 , _pv_filiation :: Filiation
382 , _pv_periods :: [PhyloPeriodId]
383 , _pv_metrics :: Map Text [Double]
384 , _pv_branches :: [PhyloBranch]
385 , _pv_nodes :: [PhyloNode]
386 , _pv_edges :: [PhyloEdge]
387 } deriving (Generic, Show)
389 -- | A phyloview is made of PhyloBranches, edges and nodes
390 data PhyloBranch = PhyloBranch
391 { _pb_id :: PhyloBranchId
393 , _pb_metrics :: Map Text [Double]
394 } deriving (Generic, Show)
396 data PhyloEdge = PhyloEdge
397 { _pe_source :: PhyloGroupId
398 , _pe_target :: PhyloGroupId
399 , _pe_type :: EdgeType
400 , _pe_weight :: Weight
401 } deriving (Generic, Show)
403 data PhyloNode = PhyloNode
404 { _pn_id :: PhyloGroupId
405 , _pn_bid :: Maybe PhyloBranchId
408 , _pn_ngrams :: Maybe [Ngrams]
409 , _pn_metrics :: Map Text [Double]
410 , _pn_cooc :: Map (Int,Int) Double
411 , _pn_parents :: Maybe [PhyloGroupId]
412 , _pn_childs :: [PhyloNode]
413 } deriving (Generic, Show)
415 ------------------------
416 -- | PhyloQueryView | --
417 ------------------------
420 data ExportMode = Json | Dot | Svg
421 deriving (Generic, Show, Read)
422 data DisplayMode = Flat | Nested
423 deriving (Generic, Show, Read)
425 -- | A PhyloQueryView describes a Phylo as an output view
426 data PhyloQueryView = PhyloQueryView
429 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
430 , _qv_filiation :: Filiation
432 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
433 , _qv_levelChilds :: Bool
434 , _qv_levelChildsDepth :: Level
436 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
437 -- Firstly the metrics, then the filters and the taggers
438 , _qv_metrics :: [Metric]
439 , _qv_filters :: [Filter]
440 , _qv_taggers :: [Tagger]
442 -- An asc or desc sort to apply to the PhyloGraph
443 , _qv_sort :: Maybe (Sort,Order)
445 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
446 , _qv_export :: ExportMode
447 , _qv_display :: DisplayMode
448 , _qv_verbose :: Bool
457 makeLenses ''PhyloParam
458 makeLenses ''Software
461 makeLenses ''PhyloFoundations
462 makeLenses ''PhyloGroup
463 makeLenses ''PhyloLevel
464 makeLenses ''PhyloPeriod
465 makeLenses ''PhyloFis
467 makeLenses ''Proximity
471 makeLenses ''PhyloQueryBuild
472 makeLenses ''PhyloQueryView
474 makeLenses ''PhyloView
475 makeLenses ''PhyloBranch
476 makeLenses ''PhyloNode
477 makeLenses ''PhyloEdge
480 ------------------------
481 -- | JSON instances | --
482 ------------------------
485 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
486 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
487 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
488 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
489 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
490 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
492 $(deriveJSON (unPrefix "_software_" ) ''Software )
493 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
495 $(deriveJSON defaultOptions ''Filter )
496 $(deriveJSON defaultOptions ''Metric )
497 $(deriveJSON defaultOptions ''Cluster )
498 $(deriveJSON defaultOptions ''Proximity )
500 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
501 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
502 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
503 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
504 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
506 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
507 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
509 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
510 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
511 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
512 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
513 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
515 $(deriveJSON defaultOptions ''Filiation )
516 $(deriveJSON defaultOptions ''EdgeType )
518 ---------------------------
519 -- | Swagger instances | --
520 ---------------------------
522 instance ToSchema Phylo where
523 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
524 instance ToSchema PhyloFoundations where
525 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
526 instance ToSchema PhyloPeriod where
527 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
528 instance ToSchema PhyloLevel where
529 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
530 instance ToSchema PhyloGroup where
531 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
532 instance ToSchema PhyloFis where
533 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
534 instance ToSchema Software where
535 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
536 instance ToSchema PhyloParam where
537 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
538 instance ToSchema Filter
539 instance ToSchema Metric
540 instance ToSchema Cluster
541 instance ToSchema Proximity where
542 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
543 instance ToSchema FisParams where
544 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
545 instance ToSchema HammingParams where
546 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
547 instance ToSchema LouvainParams where
548 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
549 instance ToSchema RCParams where
550 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
551 instance ToSchema WLJParams where
552 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
553 instance ToSchema LBParams where
554 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
555 instance ToSchema SBParams where
556 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
557 instance ToSchema PhyloQueryBuild where
558 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
559 instance ToSchema PhyloView where
560 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
561 instance ToSchema PhyloBranch where
562 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
563 instance ToSchema PhyloEdge where
564 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
565 instance ToSchema PhyloNode where
566 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
567 instance ToSchema Filiation
568 instance ToSchema EdgeType
570 ----------------------------
571 -- | TODO XML instances | --
572 ----------------------------