2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
47 import Control.DeepSeq
54 -- | Global parameters of a Phylo
56 PhyloParam { _phyloParam_version :: Text -- Double ?
57 , _phyloParam_software :: Software
58 , _phyloParam_query :: PhyloQueryBuild
59 } deriving (Generic, Show, Eq)
62 -- | Software parameters
64 Software { _software_name :: Text
65 , _software_version :: Text
66 } deriving (Generic, Show, Eq)
74 -- | Phylo datatype of a phylomemy
75 -- Duration : time Segment of the whole Phylo
76 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
77 -- Periods : list of all the periods of a Phylo
79 Phylo { _phylo_duration :: (Start, End)
80 , _phylo_foundations :: PhyloFoundations
81 , _phylo_periods :: [PhyloPeriod]
82 , _phylo_docsByYears :: Map Date Double
83 , _phylo_cooc :: Map Date (Map (Int,Int) Double)
84 , _phylo_fis :: Map (Date,Date) [PhyloFis]
85 , _phylo_param :: PhyloParam
87 deriving (Generic, Show, Eq)
90 -- | The foundations of a phylomemy created from a given TermList
91 data PhyloFoundations =
92 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
93 , _phylo_foundationsTermsList :: TermList
94 } deriving (Generic, Show, Eq)
97 -- | Date : a simple Integer
100 -- | UTCTime in seconds since UNIX epoch
101 -- type Start = POSIXTime
102 -- type End = POSIXTime
107 ---------------------
108 -- | PhyloPeriod | --
109 ---------------------
112 -- | PhyloStep : steps of phylomemy on temporal axis
113 -- Period: tuple (start date, end date) of the step of the phylomemy
114 -- Levels: levels of granularity
116 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
117 , _phylo_periodLevels :: [PhyloLevel]
119 deriving (Generic, Show, Eq)
127 -- | PhyloLevel : levels of phylomemy on level axis
128 -- Levels description:
129 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
130 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
131 -- Level 1: First level of clustering
132 -- Level N: Nth level of clustering
134 PhyloLevel { _phylo_levelId :: PhyloLevelId
135 , _phylo_levelGroups :: [PhyloGroup]
137 deriving (Generic, Show, Eq)
145 -- | PhyloGroup : group of ngrams at each level and step
146 -- Label : maybe has a label as text
147 -- Ngrams: set of terms that build the group
148 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
149 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
150 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
151 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
153 PhyloGroup { _phylo_groupId :: PhyloGroupId
154 , _phylo_groupLabel :: Text
155 , _phylo_groupNgrams :: [Int]
156 , _phylo_groupMeta :: Map Text Double
157 , _phylo_groupBranchId :: Maybe PhyloBranchId
158 , _phylo_groupCooc :: Map (Int,Int) Double
160 , _phylo_groupPeriodParents :: [Pointer]
161 , _phylo_groupPeriodChilds :: [Pointer]
163 , _phylo_groupLevelParents :: [Pointer]
164 , _phylo_groupLevelChilds :: [Pointer]
166 deriving (Generic, NFData, Show, Eq, Ord)
168 -- instance NFData PhyloGroup
171 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
173 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
177 type PhyloPeriodId = (Start, End)
178 type PhyloLevelId = (PhyloPeriodId, Level)
179 type PhyloGroupId = (PhyloLevelId, Index)
180 type PhyloBranchId = (Level, Index)
183 -- | Weight : A generic mesure that can be associated with an Id
185 -- | Pointer : A weighted linked with a given PhyloGroup
186 type Pointer = (PhyloGroupId, Weight)
187 -- | Ngrams : a contiguous sequence of n terms
196 -- | Document : a piece of Text linked to a Date
197 data Document = Document
200 } deriving (Show,Generic)
202 -- | Clique : Set of ngrams cooccurring in the same Document
203 type Clique = Set Ngrams
204 -- | Support : Number of Documents where a Clique occurs
206 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
207 data PhyloFis = PhyloFis
208 { _phyloFis_clique :: Clique
209 , _phyloFis_support :: Support
210 , _phyloFis_period :: (Date,Date)
211 } deriving (Generic,Show,Eq)
213 -- | A list of clustered PhyloGroup
214 type PhyloCluster = [PhyloGroup]
217 -- | A PhyloGroup in a Graph
218 type GroupNode = PhyloGroup
219 -- | A weighted links between two PhyloGroups in a Graph
220 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
221 -- | The association as a Graph between a list of Nodes and a list of Edges
222 type GroupGraph = ([GroupNode],[GroupEdge])
230 data PhyloError = LevelDoesNotExist
240 -- | Cluster constructors
241 data Cluster = Fis FisParams
242 | RelatedComponents RCParams
243 | Louvain LouvainParams
244 deriving (Generic, Show, Eq, Read)
246 -- | Parameters for Fis clustering
247 data FisParams = FisParams
248 { _fis_keepMinorFis :: Bool
249 , _fis_minSupport :: Support
250 , _fis_minSize :: Int
251 } deriving (Generic, Show, Eq, Read)
253 -- | Parameters for RelatedComponents clustering
254 data RCParams = RCParams
255 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
257 -- | Parameters for Louvain clustering
258 data LouvainParams = LouvainParams
259 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
267 -- | Proximity constructors
268 data Proximity = WeightedLogJaccard WLJParams
269 | Hamming HammingParams
271 deriving (Generic, Show, Eq, Read)
273 -- | Parameters for WeightedLogJaccard proximity
274 data WLJParams = WLJParams
275 { _wlj_threshold :: Double
276 , _wlj_sensibility :: Double
277 } deriving (Generic, Show, Eq, Read)
279 -- | Parameters for Hamming proximity
280 data HammingParams = HammingParams
281 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
289 -- | Filter constructors
290 data Filter = LonelyBranch LBParams
291 | SizeBranch SBParams
292 deriving (Generic, Show, Eq)
294 -- | Parameters for LonelyBranch filter
295 data LBParams = LBParams
296 { _lb_periodsInf :: Int
297 , _lb_periodsSup :: Int
298 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
300 -- | Parameters for SizeBranch filter
301 data SBParams = SBParams
302 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
310 -- | Metric constructors
311 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
319 -- | Tagger constructors
320 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
328 -- | Sort constructors
329 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
330 data Order = Asc | Desc deriving (Generic, Show, Read)
338 -- | A Phyloquery describes a phylomemic reconstruction
339 data PhyloQueryBuild = PhyloQueryBuild
340 { _q_phyloTitle :: Text
341 , _q_phyloDesc :: Text
343 -- Grain and Steps for the PhyloPeriods
344 , _q_periodGrain :: Int
345 , _q_periodSteps :: Int
347 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
348 , _q_contextualUnit :: Cluster
349 , _q_contextualUnitMetrics :: [Metric]
350 , _q_contextualUnitFilters :: [Filter]
352 -- Inter-temporal matching method of the Phylo
353 , _q_interTemporalMatching :: Proximity
354 , _q_interTemporalMatchingFrame :: Int
356 , _q_reBranchThr :: Double
357 , _q_reBranchNth :: Int
359 -- Last level of reconstruction
360 , _q_nthLevel :: Level
361 -- Clustering method used from level 1 to nthLevel
362 , _q_nthCluster :: Cluster
363 } deriving (Generic, Show, Eq)
365 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
366 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
367 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
374 -- | A PhyloView is the output type of a Phylo
375 data PhyloView = PhyloView
376 { _pv_param :: PhyloParam
378 , _pv_description :: Text
379 , _pv_filiation :: Filiation
381 , _pv_periods :: [PhyloPeriodId]
382 , _pv_metrics :: Map Text [Double]
383 , _pv_branches :: [PhyloBranch]
384 , _pv_nodes :: [PhyloNode]
385 , _pv_edges :: [PhyloEdge]
386 } deriving (Generic, Show)
388 -- | A phyloview is made of PhyloBranches, edges and nodes
389 data PhyloBranch = PhyloBranch
390 { _pb_id :: PhyloBranchId
392 , _pb_metrics :: Map Text [Double]
393 } deriving (Generic, Show)
395 data PhyloEdge = PhyloEdge
396 { _pe_source :: PhyloGroupId
397 , _pe_target :: PhyloGroupId
398 , _pe_type :: EdgeType
399 , _pe_weight :: Weight
400 } deriving (Generic, Show)
402 data PhyloNode = PhyloNode
403 { _pn_id :: PhyloGroupId
404 , _pn_bid :: Maybe PhyloBranchId
407 , _pn_ngrams :: Maybe [Ngrams]
408 , _pn_metrics :: Map Text [Double]
409 , _pn_parents :: Maybe [PhyloGroupId]
410 , _pn_childs :: [PhyloNode]
411 } deriving (Generic, Show)
413 ------------------------
414 -- | PhyloQueryView | --
415 ------------------------
418 data ExportMode = Json | Dot | Svg
419 deriving (Generic, Show, Read)
420 data DisplayMode = Flat | Nested
421 deriving (Generic, Show, Read)
423 -- | A PhyloQueryView describes a Phylo as an output view
424 data PhyloQueryView = PhyloQueryView
427 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
428 , _qv_filiation :: Filiation
430 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
431 , _qv_levelChilds :: Bool
432 , _qv_levelChildsDepth :: Level
434 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
435 -- Firstly the metrics, then the filters and the taggers
436 , _qv_metrics :: [Metric]
437 , _qv_filters :: [Filter]
438 , _qv_taggers :: [Tagger]
440 -- An asc or desc sort to apply to the PhyloGraph
441 , _qv_sort :: Maybe (Sort,Order)
443 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
444 , _qv_export :: ExportMode
445 , _qv_display :: DisplayMode
446 , _qv_verbose :: Bool
455 makeLenses ''PhyloParam
456 makeLenses ''Software
459 makeLenses ''PhyloFoundations
460 makeLenses ''PhyloGroup
461 makeLenses ''PhyloLevel
462 makeLenses ''PhyloPeriod
463 makeLenses ''PhyloFis
465 makeLenses ''Proximity
469 makeLenses ''PhyloQueryBuild
470 makeLenses ''PhyloQueryView
472 makeLenses ''PhyloView
473 makeLenses ''PhyloBranch
474 makeLenses ''PhyloNode
475 makeLenses ''PhyloEdge
478 ------------------------
479 -- | JSON instances | --
480 ------------------------
483 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
484 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
485 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
486 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
487 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
488 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
490 $(deriveJSON (unPrefix "_software_" ) ''Software )
491 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
493 $(deriveJSON defaultOptions ''Filter )
494 $(deriveJSON defaultOptions ''Metric )
495 $(deriveJSON defaultOptions ''Cluster )
496 $(deriveJSON defaultOptions ''Proximity )
498 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
499 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
500 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
501 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
502 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
504 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
505 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
507 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
508 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
509 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
510 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
511 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
513 $(deriveJSON defaultOptions ''Filiation )
514 $(deriveJSON defaultOptions ''EdgeType )
517 ----------------------------
518 -- | TODO XML instances | --
519 ----------------------------