2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
47 import Control.DeepSeq
54 -- | Global parameters of a Phylo
56 PhyloParam { _phyloParam_version :: Text -- Double ?
57 , _phyloParam_software :: Software
58 , _phyloParam_query :: PhyloQueryBuild
59 } deriving (Generic, Show, Eq)
62 -- | Software parameters
64 Software { _software_name :: Text
65 , _software_version :: Text
66 } deriving (Generic, Show, Eq)
74 -- | Phylo datatype of a phylomemy
75 -- Duration : time Segment of the whole Phylo
76 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
77 -- Periods : list of all the periods of a Phylo
79 Phylo { _phylo_duration :: (Start, End)
80 , _phylo_foundations :: PhyloFoundations
81 , _phylo_periods :: [PhyloPeriod]
82 , _phylo_docsByYears :: Map Date Double
83 , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
84 , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
85 , _phylo_param :: PhyloParam
87 deriving (Generic, Show, Eq)
90 -- | The foundations of a phylomemy created from a given TermList
91 data PhyloFoundations =
92 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
93 , _phylo_foundationsTermsList :: TermList
94 } deriving (Generic, Show, Eq)
97 -- | Date : a simple Integer
100 -- | UTCTime in seconds since UNIX epoch
101 -- type Start = POSIXTime
102 -- type End = POSIXTime
107 ---------------------
108 -- | PhyloPeriod | --
109 ---------------------
112 -- | PhyloStep : steps of phylomemy on temporal axis
113 -- Period: tuple (start date, end date) of the step of the phylomemy
114 -- Levels: levels of granularity
116 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
117 , _phylo_periodLevels :: [PhyloLevel]
119 deriving (Generic, Show, Eq)
127 -- | PhyloLevel : levels of phylomemy on level axis
128 -- Levels description:
129 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
130 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
131 -- Level 1: First level of clustering
132 -- Level N: Nth level of clustering
134 PhyloLevel { _phylo_levelId :: PhyloLevelId
135 , _phylo_levelGroups :: [PhyloGroup]
137 deriving (Generic, Show, Eq)
145 -- | PhyloGroup : group of ngrams at each level and step
146 -- Label : maybe has a label as text
147 -- Ngrams: set of terms that build the group
148 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
149 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
150 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
151 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
153 PhyloGroup { _phylo_groupId :: PhyloGroupId
154 , _phylo_groupLabel :: Text
155 , _phylo_groupNgrams :: [Int]
156 , _phylo_groupNgramsMeta :: Map Text [Double]
157 , _phylo_groupMeta :: Map Text Double
158 , _phylo_groupBranchId :: Maybe PhyloBranchId
159 , _phylo_groupCooc :: !(Map (Int,Int) Double)
161 , _phylo_groupPeriodParents :: [Pointer]
162 , _phylo_groupPeriodChilds :: [Pointer]
164 , _phylo_groupLevelParents :: [Pointer]
165 , _phylo_groupLevelChilds :: [Pointer]
167 deriving (Generic, NFData, Show, Eq, Ord)
169 -- instance NFData PhyloGroup
172 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
174 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
178 type PhyloPeriodId = (Start, End)
179 type PhyloLevelId = (PhyloPeriodId, Level)
180 type PhyloGroupId = (PhyloLevelId, Index)
181 type PhyloBranchId = (Level, Index)
184 -- | Weight : A generic mesure that can be associated with an Id
186 -- | Pointer : A weighted linked with a given PhyloGroup
187 type Pointer = (PhyloGroupId, Weight)
188 -- | Ngrams : a contiguous sequence of n terms
197 -- | Document : a piece of Text linked to a Date
198 data Document = Document
201 } deriving (Show,Generic,NFData)
203 -- | Clique : Set of ngrams cooccurring in the same Document
204 type Clique = Set Ngrams
205 -- | Support : Number of Documents where a Clique occurs
207 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
208 data PhyloFis = PhyloFis
209 { _phyloFis_clique :: Clique
210 , _phyloFis_support :: Support
211 , _phyloFis_period :: (Date,Date)
212 } deriving (Generic,NFData,Show,Eq)
214 -- | A list of clustered PhyloGroup
215 type PhyloCluster = [PhyloGroup]
218 -- | A PhyloGroup in a Graph
219 type GroupNode = PhyloGroup
220 -- | A weighted links between two PhyloGroups in a Graph
221 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
222 -- | The association as a Graph between a list of Nodes and a list of Edges
223 type GroupGraph = ([GroupNode],[GroupEdge])
231 data PhyloError = LevelDoesNotExist
241 -- | Cluster constructors
242 data Cluster = Fis FisParams
243 | RelatedComponents RCParams
244 | Louvain LouvainParams
245 deriving (Generic, Show, Eq, Read)
247 -- | Parameters for Fis clustering
248 data FisParams = FisParams
249 { _fis_keepMinorFis :: Bool
250 , _fis_minSupport :: Support
251 , _fis_minSize :: Int
252 } deriving (Generic, Show, Eq, Read)
254 -- | Parameters for RelatedComponents clustering
255 data RCParams = RCParams
256 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
258 -- | Parameters for Louvain clustering
259 data LouvainParams = LouvainParams
260 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
268 -- | Proximity constructors
269 data Proximity = WeightedLogJaccard WLJParams
270 | Hamming HammingParams
272 deriving (Generic, Show, Eq, Read)
274 -- | Parameters for WeightedLogJaccard proximity
275 data WLJParams = WLJParams
276 { _wlj_threshold :: Double
277 , _wlj_sensibility :: Double
278 } deriving (Generic, Show, Eq, Read)
280 -- | Parameters for Hamming proximity
281 data HammingParams = HammingParams
282 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
290 -- | Filter constructors
291 data Filter = LonelyBranch LBParams
292 | SizeBranch SBParams
293 deriving (Generic, Show, Eq)
295 -- | Parameters for LonelyBranch filter
296 data LBParams = LBParams
297 { _lb_periodsInf :: Int
298 , _lb_periodsSup :: Int
299 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
301 -- | Parameters for SizeBranch filter
302 data SBParams = SBParams
303 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
311 -- | Metric constructors
312 data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
320 -- | Tagger constructors
321 data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
322 | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
330 -- | Sort constructors
331 data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
332 data Order = Asc | Desc deriving (Generic, Show, Read)
340 -- | A Phyloquery describes a phylomemic reconstruction
341 data PhyloQueryBuild = PhyloQueryBuild
342 { _q_phyloTitle :: Text
343 , _q_phyloDesc :: Text
345 -- Grain and Steps for the PhyloPeriods
346 , _q_periodGrain :: Int
347 , _q_periodSteps :: Int
349 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
350 , _q_contextualUnit :: Cluster
351 , _q_contextualUnitMetrics :: [Metric]
352 , _q_contextualUnitFilters :: [Filter]
354 -- Inter-temporal matching method of the Phylo
355 , _q_interTemporalMatching :: Proximity
356 , _q_interTemporalMatchingFrame :: Int
357 , _q_interTemporalMatchingFrameTh :: Double
359 , _q_reBranchThr :: Double
360 , _q_reBranchNth :: Int
362 -- Last level of reconstruction
363 , _q_nthLevel :: Level
364 -- Clustering method used from level 1 to nthLevel
365 , _q_nthCluster :: Cluster
366 } deriving (Generic, Show, Eq)
368 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
369 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
370 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
377 -- | A PhyloView is the output type of a Phylo
378 data PhyloView = PhyloView
379 { _pv_param :: PhyloParam
381 , _pv_description :: Text
382 , _pv_filiation :: Filiation
384 , _pv_periods :: [PhyloPeriodId]
385 , _pv_metrics :: Map Text [Double]
386 , _pv_branches :: [PhyloBranch]
387 , _pv_nodes :: [PhyloNode]
388 , _pv_edges :: [PhyloEdge]
389 } deriving (Generic, Show)
391 -- | A phyloview is made of PhyloBranches, edges and nodes
392 data PhyloBranch = PhyloBranch
393 { _pb_id :: PhyloBranchId
395 , _pb_metrics :: Map Text [Double]
396 } deriving (Generic, Show)
398 data PhyloEdge = PhyloEdge
399 { _pe_source :: PhyloGroupId
400 , _pe_target :: PhyloGroupId
401 , _pe_type :: EdgeType
402 , _pe_weight :: Weight
403 } deriving (Generic, Show)
405 data PhyloNode = PhyloNode
406 { _pn_id :: PhyloGroupId
407 , _pn_bid :: Maybe PhyloBranchId
410 , _pn_ngrams :: Maybe [Ngrams]
411 , _pn_metrics :: Map Text [Double]
412 , _pn_cooc :: Map (Int,Int) Double
413 , _pn_parents :: Maybe [PhyloGroupId]
414 , _pn_childs :: [PhyloNode]
415 } deriving (Generic, Show)
417 ------------------------
418 -- | PhyloQueryView | --
419 ------------------------
422 data ExportMode = Json | Dot | Svg
423 deriving (Generic, Show, Read)
424 data DisplayMode = Flat | Nested
425 deriving (Generic, Show, Read)
427 -- | A PhyloQueryView describes a Phylo as an output view
428 data PhyloQueryView = PhyloQueryView
431 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
432 , _qv_filiation :: Filiation
434 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
435 , _qv_levelChilds :: Bool
436 , _qv_levelChildsDepth :: Level
438 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
439 -- Firstly the metrics, then the filters and the taggers
440 , _qv_metrics :: [Metric]
441 , _qv_filters :: [Filter]
442 , _qv_taggers :: [Tagger]
444 -- An asc or desc sort to apply to the PhyloGraph
445 , _qv_sort :: Maybe (Sort,Order)
447 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
448 , _qv_export :: ExportMode
449 , _qv_display :: DisplayMode
450 , _qv_verbose :: Bool
459 makeLenses ''PhyloParam
460 makeLenses ''Software
463 makeLenses ''PhyloFoundations
464 makeLenses ''PhyloGroup
465 makeLenses ''PhyloLevel
466 makeLenses ''PhyloPeriod
467 makeLenses ''PhyloFis
469 makeLenses ''Proximity
473 makeLenses ''PhyloQueryBuild
474 makeLenses ''PhyloQueryView
476 makeLenses ''PhyloView
477 makeLenses ''PhyloBranch
478 makeLenses ''PhyloNode
479 makeLenses ''PhyloEdge
482 ------------------------
483 -- | JSON instances | --
484 ------------------------
487 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
488 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
489 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
490 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
491 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
492 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
494 $(deriveJSON (unPrefix "_software_" ) ''Software )
495 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
497 $(deriveJSON defaultOptions ''Filter )
498 $(deriveJSON defaultOptions ''Metric )
499 $(deriveJSON defaultOptions ''Cluster )
500 $(deriveJSON defaultOptions ''Proximity )
502 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
503 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
504 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
505 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
506 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
508 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
509 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
511 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
512 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
513 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
514 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
515 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
517 $(deriveJSON defaultOptions ''Filiation )
518 $(deriveJSON defaultOptions ''EdgeType )
521 ----------------------------
522 -- | TODO XML instances | --
523 ----------------------------