2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
39 import Data.Vector (Vector)
41 --import Data.Time.Clock.POSIX (POSIXTime)
42 import GHC.Generics (Generic)
43 --import Gargantext.Database.Schema.Ngrams (NgramsId)
44 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
45 import Gargantext.Text.Context (TermList)
46 import Gargantext.Prelude
48 import Control.DeepSeq
55 -- | Global parameters of a Phylo
57 PhyloParam { _phyloParam_version :: Text -- Double ?
58 , _phyloParam_software :: Software
59 , _phyloParam_query :: PhyloQueryBuild
60 } deriving (Generic, Show, Eq)
63 -- | Software parameters
65 Software { _software_name :: Text
66 , _software_version :: Text
67 } deriving (Generic, Show, Eq)
75 -- | Phylo datatype of a phylomemy
76 -- Duration : time Segment of the whole Phylo
77 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
78 -- Periods : list of all the periods of a Phylo
80 Phylo { _phylo_duration :: (Start, End)
81 , _phylo_foundations :: PhyloFoundations
82 , _phylo_periods :: [PhyloPeriod]
83 , _phylo_docsByYears :: Map Date Double
84 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
85 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
86 , _phylo_param :: PhyloParam
88 deriving (Generic, Show, Eq)
91 -- | The foundations of a phylomemy created from a given TermList
92 data PhyloFoundations =
93 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
94 , _phylo_foundationsTermsList :: TermList
95 } deriving (Generic, Show, Eq)
98 -- | Date : a simple Integer
101 -- | UTCTime in seconds since UNIX epoch
102 -- type Start = POSIXTime
103 -- type End = POSIXTime
108 ---------------------
109 -- | PhyloPeriod | --
110 ---------------------
113 -- | PhyloStep : steps of phylomemy on temporal axis
114 -- Period: tuple (start date, end date) of the step of the phylomemy
115 -- Levels: levels of granularity
117 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
118 , _phylo_periodLevels :: [PhyloLevel]
120 deriving (Generic, Show, Eq)
128 -- | PhyloLevel : levels of phylomemy on level axis
129 -- Levels description:
130 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
131 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
132 -- Level 1: First level of clustering
133 -- Level N: Nth level of clustering
135 PhyloLevel { _phylo_levelId :: PhyloLevelId
136 , _phylo_levelGroups :: [PhyloGroup]
138 deriving (Generic, Show, Eq)
146 -- | PhyloGroup : group of ngrams at each level and step
147 -- Label : maybe has a label as text
148 -- Ngrams: set of terms that build the group
149 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
150 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
151 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
152 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
154 PhyloGroup { _phylo_groupId :: PhyloGroupId
155 , _phylo_groupLabel :: Text
156 , _phylo_groupNgrams :: [Int]
157 , _phylo_groupNgramsMeta :: Map Text [Double]
158 , _phylo_groupMeta :: Map Text Double
159 , _phylo_groupBranchId :: Maybe PhyloBranchId
160 , _phylo_groupCooc :: !(Map (Int,Int) Double)
162 , _phylo_groupPeriodParents :: [Pointer]
163 , _phylo_groupPeriodChilds :: [Pointer]
165 , _phylo_groupLevelParents :: [Pointer]
166 , _phylo_groupLevelChilds :: [Pointer]
168 deriving (Generic, NFData, Show, Eq, Ord)
170 -- instance NFData PhyloGroup
173 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
175 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
179 type PhyloPeriodId = (Start, End)
180 type PhyloLevelId = (PhyloPeriodId, Level)
181 type PhyloGroupId = (PhyloLevelId, Index)
182 type PhyloBranchId = (Level, Index)
185 -- | Weight : A generic mesure that can be associated with an Id
187 -- | Pointer : A weighted linked with a given PhyloGroup
188 type Pointer = (PhyloGroupId, Weight)
189 -- | Ngrams : a contiguous sequence of n terms
198 -- | Document : a piece of Text linked to a Date
199 data Document = Document
202 } deriving (Show,Generic,NFData)
204 -- | Clique : Set of ngrams cooccurring in the same Document
205 type Clique = Set Ngrams
206 -- | Support : Number of Documents where a Clique occurs
208 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
209 data PhyloFis = PhyloFis
210 { _phyloFis_clique :: Clique
211 , _phyloFis_support :: Support
212 , _phyloFis_period :: (Date,Date)
213 } deriving (Generic,NFData,Show,Eq)
215 -- | A list of clustered PhyloGroup
216 type PhyloCluster = [PhyloGroup]
219 -- | A PhyloGroup in a Graph
220 type GroupNode = PhyloGroup
221 -- | A weighted links between two PhyloGroups in a Graph
222 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
223 -- | The association as a Graph between a list of Nodes and a list of Edges
224 type GroupGraph = ([GroupNode],[GroupEdge])
232 data PhyloError = LevelDoesNotExist
242 -- | Cluster constructors
243 data Cluster = Fis FisParams
244 | RelatedComponents RCParams
245 | Louvain LouvainParams
246 deriving (Generic, Show, Eq, Read)
248 -- | Parameters for Fis clustering
249 data FisParams = FisParams
250 { _fis_keepMinorFis :: Bool
251 , _fis_minSupport :: Support
252 , _fis_minSize :: Int
253 } deriving (Generic, Show, Eq, Read)
255 -- | Parameters for RelatedComponents clustering
256 data RCParams = RCParams
257 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
259 -- | Parameters for Louvain clustering
260 data LouvainParams = LouvainParams
261 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
269 -- | Proximity constructors
270 data Proximity = WeightedLogJaccard WLJParams
271 | Hamming HammingParams
273 deriving (Generic, Show, Eq, Read)
275 -- | Parameters for WeightedLogJaccard proximity
276 data WLJParams = WLJParams
277 { _wlj_threshold :: Double
278 , _wlj_sensibility :: Double
279 } deriving (Generic, Show, Eq, Read)
281 -- | Parameters for Hamming proximity
282 data HammingParams = HammingParams
283 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
291 -- | Filter constructors
292 data Filter = LonelyBranch LBParams
293 | SizeBranch SBParams
294 deriving (Generic, Show, Eq)
296 -- | Parameters for LonelyBranch filter
297 data LBParams = LBParams
298 { _lb_periodsInf :: Int
299 , _lb_periodsSup :: Int
300 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
302 -- | Parameters for SizeBranch filter
303 data SBParams = SBParams
304 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
312 -- | Metric constructors
313 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
321 -- | Tagger constructors
322 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
323 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
331 -- | Sort constructors
332 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
333 data Order = Asc | Desc deriving (Generic, Show, Read)
341 -- | A Phyloquery describes a phylomemic reconstruction
342 data PhyloQueryBuild = PhyloQueryBuild
343 { _q_phyloTitle :: Text
344 , _q_phyloDesc :: Text
346 -- Grain and Steps for the PhyloPeriods
347 , _q_periodGrain :: Int
348 , _q_periodSteps :: Int
350 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
351 , _q_contextualUnit :: Cluster
352 , _q_contextualUnitMetrics :: [Metric]
353 , _q_contextualUnitFilters :: [Filter]
355 -- Inter-temporal matching method of the Phylo
356 , _q_interTemporalMatching :: Proximity
357 , _q_interTemporalMatchingFrame :: Int
358 , _q_interTemporalMatchingFrameTh :: Double
360 , _q_reBranchThr :: Double
361 , _q_reBranchNth :: Int
363 -- Last level of reconstruction
364 , _q_nthLevel :: Level
365 -- Clustering method used from level 1 to nthLevel
366 , _q_nthCluster :: Cluster
367 } deriving (Generic, Show, Eq)
369 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
370 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
371 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
378 -- | A PhyloView is the output type of a Phylo
379 data PhyloView = PhyloView
380 { _pv_param :: PhyloParam
382 , _pv_description :: Text
383 , _pv_filiation :: Filiation
385 , _pv_periods :: [PhyloPeriodId]
386 , _pv_metrics :: Map Text [Double]
387 , _pv_branches :: [PhyloBranch]
388 , _pv_nodes :: [PhyloNode]
389 , _pv_edges :: [PhyloEdge]
390 } deriving (Generic, Show)
392 -- | A phyloview is made of PhyloBranches, edges and nodes
393 data PhyloBranch = PhyloBranch
394 { _pb_id :: PhyloBranchId
396 , _pb_metrics :: Map Text [Double]
397 } deriving (Generic, Show)
399 data PhyloEdge = PhyloEdge
400 { _pe_source :: PhyloGroupId
401 , _pe_target :: PhyloGroupId
402 , _pe_type :: EdgeType
403 , _pe_weight :: Weight
404 } deriving (Generic, Show)
406 data PhyloNode = PhyloNode
407 { _pn_id :: PhyloGroupId
408 , _pn_bid :: Maybe PhyloBranchId
411 , _pn_ngrams :: Maybe [Ngrams]
412 , _pn_metrics :: Map Text [Double]
413 , _pn_cooc :: Map (Int,Int) Double
414 , _pn_parents :: Maybe [PhyloGroupId]
415 , _pn_childs :: [PhyloNode]
416 } deriving (Generic, Show)
418 ------------------------
419 -- | PhyloQueryView | --
420 ------------------------
423 data ExportMode = Json | Dot | Svg
424 deriving (Generic, Show, Read)
425 data DisplayMode = Flat | Nested
426 deriving (Generic, Show, Read)
428 -- | A PhyloQueryView describes a Phylo as an output view
429 data PhyloQueryView = PhyloQueryView
432 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
433 , _qv_filiation :: Filiation
435 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
436 , _qv_levelChilds :: Bool
437 , _qv_levelChildsDepth :: Level
439 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
440 -- Firstly the metrics, then the filters and the taggers
441 , _qv_metrics :: [Metric]
442 , _qv_filters :: [Filter]
443 , _qv_taggers :: [Tagger]
445 -- An asc or desc sort to apply to the PhyloGraph
446 , _qv_sort :: Maybe (Sort,Order)
448 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
449 , _qv_export :: ExportMode
450 , _qv_display :: DisplayMode
451 , _qv_verbose :: Bool
460 makeLenses ''PhyloParam
461 makeLenses ''Software
464 makeLenses ''PhyloFoundations
465 makeLenses ''PhyloGroup
466 makeLenses ''PhyloLevel
467 makeLenses ''PhyloPeriod
468 makeLenses ''PhyloFis
470 makeLenses ''Proximity
474 makeLenses ''PhyloQueryBuild
475 makeLenses ''PhyloQueryView
477 makeLenses ''PhyloView
478 makeLenses ''PhyloBranch
479 makeLenses ''PhyloNode
480 makeLenses ''PhyloEdge
483 ------------------------
484 -- | JSON instances | --
485 ------------------------
488 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
489 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
490 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
491 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
492 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
493 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
495 $(deriveJSON (unPrefix "_software_" ) ''Software )
496 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
498 $(deriveJSON defaultOptions ''Filter )
499 $(deriveJSON defaultOptions ''Metric )
500 $(deriveJSON defaultOptions ''Cluster )
501 $(deriveJSON defaultOptions ''Proximity )
503 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
504 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
505 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
506 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
507 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
509 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
510 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
512 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
513 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
514 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
515 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
516 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
518 $(deriveJSON defaultOptions ''Filiation )
519 $(deriveJSON defaultOptions ''EdgeType )
521 ---------------------------
522 -- | Swagger instances | --
523 ---------------------------
525 instance ToSchema Phylo where
526 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
527 instance ToSchema PhyloFoundations where
528 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
529 instance ToSchema PhyloPeriod where
530 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
531 instance ToSchema PhyloLevel where
532 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
533 instance ToSchema PhyloGroup where
534 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
535 instance ToSchema PhyloFis where
536 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
537 instance ToSchema Software where
538 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
539 instance ToSchema PhyloParam where
540 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
541 instance ToSchema Filter
542 instance ToSchema Metric
543 instance ToSchema Cluster
544 instance ToSchema Proximity where
545 declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
546 instance ToSchema FisParams where
547 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
548 instance ToSchema HammingParams where
549 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
550 instance ToSchema LouvainParams where
551 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
552 instance ToSchema RCParams where
553 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
554 instance ToSchema WLJParams where
555 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
556 instance ToSchema LBParams where
557 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
558 instance ToSchema SBParams where
559 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
560 instance ToSchema PhyloQueryBuild where
561 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
562 instance ToSchema PhyloView where
563 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
564 instance ToSchema PhyloBranch where
565 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
566 instance ToSchema PhyloEdge where
567 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
568 instance ToSchema PhyloNode where
569 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
570 instance ToSchema Filiation
571 instance ToSchema EdgeType
573 ----------------------------
574 -- | TODO XML instances | --
575 ----------------------------