2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
47 import Control.DeepSeq
54 -- | Global parameters of a Phylo
56 PhyloParam { _phyloParam_version :: Text -- Double ?
57 , _phyloParam_software :: Software
58 , _phyloParam_query :: PhyloQueryBuild
59 } deriving (Generic, Show, Eq)
62 -- | Software parameters
64 Software { _software_name :: Text
65 , _software_version :: Text
66 } deriving (Generic, Show, Eq)
74 -- | Phylo datatype of a phylomemy
75 -- Duration : time Segment of the whole Phylo
76 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
77 -- Periods : list of all the periods of a Phylo
79 Phylo { _phylo_duration :: (Start, End)
80 , _phylo_foundations :: PhyloFoundations
81 , _phylo_periods :: [PhyloPeriod]
82 , _phylo_docsByYears :: Map Date Double
83 , _phylo_cooc :: Map Date (Map (Int,Int) Double)
84 , _phylo_fis :: Map (Date,Date) [PhyloFis]
85 , _phylo_param :: PhyloParam
87 deriving (Generic, Show, Eq)
90 -- | The foundations of a phylomemy created from a given TermList
91 data PhyloFoundations =
92 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
93 , _phylo_foundationsTermsList :: TermList
94 } deriving (Generic, Show, Eq)
97 -- | Date : a simple Integer
100 -- | UTCTime in seconds since UNIX epoch
101 -- type Start = POSIXTime
102 -- type End = POSIXTime
107 ---------------------
108 -- | PhyloPeriod | --
109 ---------------------
112 -- | PhyloStep : steps of phylomemy on temporal axis
113 -- Period: tuple (start date, end date) of the step of the phylomemy
114 -- Levels: levels of granularity
116 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
117 , _phylo_periodLevels :: [PhyloLevel]
119 deriving (Generic, Show, Eq)
127 -- | PhyloLevel : levels of phylomemy on level axis
128 -- Levels description:
129 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
130 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
131 -- Level 1: First level of clustering
132 -- Level N: Nth level of clustering
134 PhyloLevel { _phylo_levelId :: PhyloLevelId
135 , _phylo_levelGroups :: [PhyloGroup]
137 deriving (Generic, Show, Eq)
145 -- | PhyloGroup : group of ngrams at each level and step
146 -- Label : maybe has a label as text
147 -- Ngrams: set of terms that build the group
148 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
149 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
150 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
151 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
153 PhyloGroup { _phylo_groupId :: PhyloGroupId
154 , _phylo_groupLabel :: Text
155 , _phylo_groupNgrams :: [Int]
156 , _phylo_groupNgramsMeta :: Map Text [Double]
157 , _phylo_groupMeta :: Map Text Double
158 , _phylo_groupBranchId :: Maybe PhyloBranchId
159 , _phylo_groupCooc :: Map (Int,Int) Double
161 , _phylo_groupPeriodParents :: [Pointer]
162 , _phylo_groupPeriodChilds :: [Pointer]
164 , _phylo_groupLevelParents :: [Pointer]
165 , _phylo_groupLevelChilds :: [Pointer]
167 deriving (Generic, NFData, Show, Eq, Ord)
169 -- instance NFData PhyloGroup
172 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
174 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
178 type PhyloPeriodId = (Start, End)
179 type PhyloLevelId = (PhyloPeriodId, Level)
180 type PhyloGroupId = (PhyloLevelId, Index)
181 type PhyloBranchId = (Level, Index)
184 -- | Weight : A generic mesure that can be associated with an Id
186 -- | Pointer : A weighted linked with a given PhyloGroup
187 type Pointer = (PhyloGroupId, Weight)
188 -- | Ngrams : a contiguous sequence of n terms
197 -- | Document : a piece of Text linked to a Date
198 data Document = Document
201 } deriving (Show,Generic)
203 -- | Clique : Set of ngrams cooccurring in the same Document
204 type Clique = Set Ngrams
205 -- | Support : Number of Documents where a Clique occurs
207 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
208 data PhyloFis = PhyloFis
209 { _phyloFis_clique :: Clique
210 , _phyloFis_support :: Support
211 , _phyloFis_period :: (Date,Date)
212 } deriving (Generic,Show,Eq)
214 -- | A list of clustered PhyloGroup
215 type PhyloCluster = [PhyloGroup]
218 -- | A PhyloGroup in a Graph
219 type GroupNode = PhyloGroup
220 -- | A weighted links between two PhyloGroups in a Graph
221 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
222 -- | The association as a Graph between a list of Nodes and a list of Edges
223 type GroupGraph = ([GroupNode],[GroupEdge])
231 data PhyloError = LevelDoesNotExist
241 -- | Cluster constructors
242 data Cluster = Fis FisParams
243 | RelatedComponents RCParams
244 | Louvain LouvainParams
245 deriving (Generic, Show, Eq, Read)
247 -- | Parameters for Fis clustering
248 data FisParams = FisParams
249 { _fis_keepMinorFis :: Bool
250 , _fis_minSupport :: Support
251 , _fis_minSize :: Int
252 } deriving (Generic, Show, Eq, Read)
254 -- | Parameters for RelatedComponents clustering
255 data RCParams = RCParams
256 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
258 -- | Parameters for Louvain clustering
259 data LouvainParams = LouvainParams
260 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
268 -- | Proximity constructors
269 data Proximity = WeightedLogJaccard WLJParams
270 | Hamming HammingParams
272 deriving (Generic, Show, Eq, Read)
274 -- | Parameters for WeightedLogJaccard proximity
275 data WLJParams = WLJParams
276 { _wlj_threshold :: Double
277 , _wlj_sensibility :: Double
278 } deriving (Generic, Show, Eq, Read)
280 -- | Parameters for Hamming proximity
281 data HammingParams = HammingParams
282 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
290 -- | Filter constructors
291 data Filter = LonelyBranch LBParams
292 | SizeBranch SBParams
293 deriving (Generic, Show, Eq)
295 -- | Parameters for LonelyBranch filter
296 data LBParams = LBParams
297 { _lb_periodsInf :: Int
298 , _lb_periodsSup :: Int
299 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
301 -- | Parameters for SizeBranch filter
302 data SBParams = SBParams
303 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
311 -- | Metric constructors
312 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
320 -- | Tagger constructors
321 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
329 -- | Sort constructors
330 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
331 data Order = Asc | Desc deriving (Generic, Show, Read)
339 -- | A Phyloquery describes a phylomemic reconstruction
340 data PhyloQueryBuild = PhyloQueryBuild
341 { _q_phyloTitle :: Text
342 , _q_phyloDesc :: Text
344 -- Grain and Steps for the PhyloPeriods
345 , _q_periodGrain :: Int
346 , _q_periodSteps :: Int
348 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
349 , _q_contextualUnit :: Cluster
350 , _q_contextualUnitMetrics :: [Metric]
351 , _q_contextualUnitFilters :: [Filter]
353 -- Inter-temporal matching method of the Phylo
354 , _q_interTemporalMatching :: Proximity
355 , _q_interTemporalMatchingFrame :: Int
356 , _q_interTemporalMatchingFrameTh :: Double
358 , _q_reBranchThr :: Double
359 , _q_reBranchNth :: Int
361 -- Last level of reconstruction
362 , _q_nthLevel :: Level
363 -- Clustering method used from level 1 to nthLevel
364 , _q_nthCluster :: Cluster
365 } deriving (Generic, Show, Eq)
367 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
368 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
369 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
376 -- | A PhyloView is the output type of a Phylo
377 data PhyloView = PhyloView
378 { _pv_param :: PhyloParam
380 , _pv_description :: Text
381 , _pv_filiation :: Filiation
383 , _pv_periods :: [PhyloPeriodId]
384 , _pv_metrics :: Map Text [Double]
385 , _pv_branches :: [PhyloBranch]
386 , _pv_nodes :: [PhyloNode]
387 , _pv_edges :: [PhyloEdge]
388 } deriving (Generic, Show)
390 -- | A phyloview is made of PhyloBranches, edges and nodes
391 data PhyloBranch = PhyloBranch
392 { _pb_id :: PhyloBranchId
394 , _pb_metrics :: Map Text [Double]
395 } deriving (Generic, Show)
397 data PhyloEdge = PhyloEdge
398 { _pe_source :: PhyloGroupId
399 , _pe_target :: PhyloGroupId
400 , _pe_type :: EdgeType
401 , _pe_weight :: Weight
402 } deriving (Generic, Show)
404 data PhyloNode = PhyloNode
405 { _pn_id :: PhyloGroupId
406 , _pn_bid :: Maybe PhyloBranchId
409 , _pn_ngrams :: Maybe [Ngrams]
410 , _pn_metrics :: Map Text [Double]
411 , _pn_parents :: Maybe [PhyloGroupId]
412 , _pn_childs :: [PhyloNode]
413 } deriving (Generic, Show)
415 ------------------------
416 -- | PhyloQueryView | --
417 ------------------------
420 data ExportMode = Json | Dot | Svg
421 deriving (Generic, Show, Read)
422 data DisplayMode = Flat | Nested
423 deriving (Generic, Show, Read)
425 -- | A PhyloQueryView describes a Phylo as an output view
426 data PhyloQueryView = PhyloQueryView
429 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
430 , _qv_filiation :: Filiation
432 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
433 , _qv_levelChilds :: Bool
434 , _qv_levelChildsDepth :: Level
436 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
437 -- Firstly the metrics, then the filters and the taggers
438 , _qv_metrics :: [Metric]
439 , _qv_filters :: [Filter]
440 , _qv_taggers :: [Tagger]
442 -- An asc or desc sort to apply to the PhyloGraph
443 , _qv_sort :: Maybe (Sort,Order)
445 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
446 , _qv_export :: ExportMode
447 , _qv_display :: DisplayMode
448 , _qv_verbose :: Bool
457 makeLenses ''PhyloParam
458 makeLenses ''Software
461 makeLenses ''PhyloFoundations
462 makeLenses ''PhyloGroup
463 makeLenses ''PhyloLevel
464 makeLenses ''PhyloPeriod
465 makeLenses ''PhyloFis
467 makeLenses ''Proximity
471 makeLenses ''PhyloQueryBuild
472 makeLenses ''PhyloQueryView
474 makeLenses ''PhyloView
475 makeLenses ''PhyloBranch
476 makeLenses ''PhyloNode
477 makeLenses ''PhyloEdge
480 ------------------------
481 -- | JSON instances | --
482 ------------------------
485 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
486 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
487 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
488 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
489 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
490 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
492 $(deriveJSON (unPrefix "_software_" ) ''Software )
493 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
495 $(deriveJSON defaultOptions ''Filter )
496 $(deriveJSON defaultOptions ''Metric )
497 $(deriveJSON defaultOptions ''Cluster )
498 $(deriveJSON defaultOptions ''Proximity )
500 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
501 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
502 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
503 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
504 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
506 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
507 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
509 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
510 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
511 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
512 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
513 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
515 $(deriveJSON defaultOptions ''Filiation )
516 $(deriveJSON defaultOptions ''EdgeType )
519 ----------------------------
520 -- | TODO XML instances | --
521 ----------------------------