2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
38 import Data.Vector (Vector)
39 --import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 --import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
50 -- | Global parameters of a Phylo
52 PhyloParam { _phyloParam_version :: Text -- Double ?
53 , _phyloParam_software :: Software
54 , _phyloParam_query :: PhyloQueryBuild
55 } deriving (Generic, Show, Eq)
58 -- | Software parameters
60 Software { _software_name :: Text
61 , _software_version :: Text
62 } deriving (Generic, Show, Eq)
70 -- | Phylo datatype of a phylomemy
71 -- Duration : time Segment of the whole Phylo
72 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
73 -- Periods : list of all the periods of a Phylo
75 Phylo { _phylo_duration :: (Start, End)
76 , _phylo_foundations :: Vector Ngrams
77 , _phylo_foundationsPeaks :: PhyloPeaks
78 , _phylo_periods :: [PhyloPeriod]
79 , _phylo_param :: PhyloParam
81 deriving (Generic, Show, Eq)
83 -- | The PhyloPeaks describe the aggregation of some foundations Ngrams behind a list of Ngrams trees (ie: a forest)
84 -- PeaksLabels are the root labels of each Ngrams trees
86 PhyloPeaks { _phylo_peaksLabels :: Vector Ngrams
87 , _phylo_peaksForest :: [Tree Ngrams]
89 deriving (Generic, Show, Eq)
91 -- | A Tree of Ngrams where each node is a label
92 data Tree a = Empty | Node a [Tree a] deriving (Show, Eq)
95 -- | Date : a simple Integer
98 -- | UTCTime in seconds since UNIX epoch
99 -- type Start = POSIXTime
100 -- type End = POSIXTime
105 ---------------------
106 -- | PhyloPeriod | --
107 ---------------------
110 -- | PhyloStep : steps of phylomemy on temporal axis
111 -- Period: tuple (start date, end date) of the step of the phylomemy
112 -- Levels: levels of granularity
114 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
115 , _phylo_periodLevels :: [PhyloLevel]
117 deriving (Generic, Show, Eq)
125 -- | PhyloLevel : levels of phylomemy on level axis
126 -- Levels description:
127 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
128 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
129 -- Level 1: First level of clustering
130 -- Level N: Nth level of clustering
132 PhyloLevel { _phylo_levelId :: PhyloLevelId
133 , _phylo_levelGroups :: [PhyloGroup]
135 deriving (Generic, Show, Eq)
143 -- | PhyloGroup : group of ngrams at each level and step
144 -- Label : maybe has a label as text
145 -- Ngrams: set of terms that build the group
146 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
147 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
148 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
149 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
151 PhyloGroup { _phylo_groupId :: PhyloGroupId
152 , _phylo_groupLabel :: Text
153 , _phylo_groupNgrams :: [Int]
154 , _phylo_groupMeta :: Map Text Double
155 , _phylo_groupCooc :: Map (Int, Int) Double
156 , _phylo_groupBranchId :: Maybe PhyloBranchId
158 , _phylo_groupPeriodParents :: [Pointer]
159 , _phylo_groupPeriodChilds :: [Pointer]
161 , _phylo_groupLevelParents :: [Pointer]
162 , _phylo_groupLevelChilds :: [Pointer]
164 deriving (Generic, Show, Eq, Ord)
167 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
169 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
173 type PhyloPeriodId = (Start, End)
174 type PhyloLevelId = (PhyloPeriodId, Level)
175 type PhyloGroupId = (PhyloLevelId, Index)
176 type PhyloBranchId = (Level, Index)
179 -- | Weight : A generic mesure that can be associated with an Id
181 -- | Pointer : A weighted linked with a given PhyloGroup
182 type Pointer = (PhyloGroupId, Weight)
183 -- | Ngrams : a contiguous sequence of n terms
192 -- | Document : a piece of Text linked to a Date
193 data Document = Document
198 -- | Clique : Set of ngrams cooccurring in the same Document
199 type Clique = Set Ngrams
200 -- | Support : Number of Documents where a Clique occurs
202 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
203 data PhyloFis = PhyloFis
204 { _phyloFis_clique :: Clique
205 , _phyloFis_support :: Support
206 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
209 -- | A list of clustered PhyloGroup
210 type PhyloCluster = [PhyloGroup]
213 -- | A List of PhyloGroup in a Graph
214 type GroupNodes = [PhyloGroup]
215 -- | A List of weighted links between some PhyloGroups in a Graph
216 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
217 -- | The association as a Graph between a list of Nodes and a list of Edges
218 type GroupGraph = (GroupNodes,GroupEdges)
226 data PhyloError = LevelDoesNotExist
236 -- | Cluster constructors
237 data Cluster = Fis FisParams
238 | RelatedComponents RCParams
239 | Louvain LouvainParams
240 deriving (Generic, Show, Eq)
242 -- | Parameters for Fis clustering
243 data FisParams = FisParams
244 { _fis_keepMinorFis :: Bool
245 , _fis_minSupport :: Support
246 } deriving (Generic, Show, Eq)
248 -- | Parameters for RelatedComponents clustering
249 data RCParams = RCParams
250 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq)
252 -- | Parameters for Louvain clustering
253 data LouvainParams = LouvainParams
254 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq)
262 -- | Proximity constructors
263 data Proximity = WeightedLogJaccard WLJParams
264 | Hamming HammingParams
266 deriving (Generic, Show, Eq)
268 -- | Parameters for WeightedLogJaccard proximity
269 data WLJParams = WLJParams
270 { _wlj_threshold :: Double
271 , _wlj_sensibility :: Double
272 } deriving (Generic, Show, Eq)
274 -- | Parameters for Hamming proximity
275 data HammingParams = HammingParams
276 { _hamming_threshold :: Double } deriving (Generic, Show, Eq)
284 -- | Filter constructors
285 data Filter = SmallBranch SBParams deriving (Generic, Show, Eq)
287 -- | Parameters for SmallBranch filter
288 data SBParams = SBParams
289 { _sb_periodsInf :: Int
290 , _sb_periodsSup :: Int
291 , _sb_minNodes :: Int } deriving (Generic, Show, Eq)
299 -- | Metric constructors
300 data Metric = BranchAge deriving (Generic, Show, Eq)
308 -- | Tagger constructors
309 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
317 -- | Sort constructors
318 data Sort = ByBranchAge deriving (Generic, Show)
319 data Order = Asc | Desc deriving (Generic, Show)
327 -- | A Phyloquery describes a phylomemic reconstruction
328 data PhyloQueryBuild = PhyloQueryBuild
329 { _q_phyloTitle :: Text
330 , _q_phyloDesc :: Text
332 -- Grain and Steps for the PhyloPeriods
333 , _q_periodGrain :: Int
334 , _q_periodSteps :: Int
336 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
337 , _q_contextualUnit :: Cluster
338 , _q_contextualUnitMetrics :: [Metric]
339 , _q_contextualUnitFilters :: [Filter]
341 -- Inter-temporal matching method of the Phylo
342 , _q_interTemporalMatching :: Proximity
344 -- Last level of reconstruction
345 , _q_nthLevel :: Level
346 -- Clustering method used from level 1 to nthLevel
347 , _q_nthCluster :: Cluster
348 } deriving (Generic, Show, Eq)
350 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
351 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show)
352 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
359 -- | A PhyloView is the output type of a Phylo
360 data PhyloView = PhyloView
361 { _pv_param :: PhyloParam
363 , _pv_description :: Text
364 , _pv_filiation :: Filiation
366 , _pv_metrics :: Map Text [Double]
367 , _pv_branches :: [PhyloBranch]
368 , _pv_nodes :: [PhyloNode]
369 , _pv_edges :: [PhyloEdge]
370 } deriving (Generic, Show)
372 -- | A phyloview is made of PhyloBranches, edges and nodes
373 data PhyloBranch = PhyloBranch
374 { _pb_id :: PhyloBranchId
376 , _pb_metrics :: Map Text [Double]
377 } deriving (Generic, Show)
379 data PhyloEdge = PhyloEdge
380 { _pe_source :: PhyloGroupId
381 , _pe_target :: PhyloGroupId
382 , _pe_type :: EdgeType
383 , _pe_weight :: Weight
384 } deriving (Generic, Show)
386 data PhyloNode = PhyloNode
387 { _pn_id :: PhyloGroupId
388 , _pn_bid :: Maybe PhyloBranchId
391 , _pn_ngrams :: Maybe [Ngrams]
392 , _pn_metrics :: Map Text [Double]
393 , _pn_parents :: Maybe [PhyloGroupId]
394 , _pn_childs :: [PhyloNode]
395 } deriving (Generic, Show)
397 ------------------------
398 -- | PhyloQueryView | --
399 ------------------------
402 data ExportMode = Json | Dot | Svg
403 data DisplayMode = Flat | Nested
405 -- | A PhyloQueryView describes a Phylo as an output view
406 data PhyloQueryView = PhyloQueryView
409 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
410 , _qv_filiation :: Filiation
412 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
413 , _qv_levelChilds :: Bool
414 , _qv_levelChildsDepth :: Level
416 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
417 -- Firstly the metrics, then the filters and the taggers
418 , _qv_metrics :: [Metric]
419 , _qv_filters :: [Filter]
420 , _qv_taggers :: [Tagger]
422 -- An asc or desc sort to apply to the PhyloGraph
423 , _qv_sort :: Maybe (Sort,Order)
425 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
426 , _qv_export :: ExportMode
427 , _qv_display :: DisplayMode
428 , _qv_verbose :: Bool
437 makeLenses ''PhyloParam
438 makeLenses ''Software
441 makeLenses ''PhyloPeaks
442 makeLenses ''PhyloGroup
443 makeLenses ''PhyloLevel
444 makeLenses ''PhyloPeriod
445 makeLenses ''PhyloFis
447 makeLenses ''Proximity
451 makeLenses ''PhyloQueryBuild
452 makeLenses ''PhyloQueryView
454 makeLenses ''PhyloView
455 makeLenses ''PhyloBranch
456 makeLenses ''PhyloNode
457 makeLenses ''PhyloEdge
460 ------------------------
461 -- | JSON instances | --
462 ------------------------
465 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
466 $(deriveJSON (unPrefix "_phylo_peaks" ) ''PhyloPeaks )
467 $(deriveJSON defaultOptions ''Tree )
468 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
469 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
470 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
471 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
473 $(deriveJSON (unPrefix "_software_" ) ''Software )
474 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
476 $(deriveJSON defaultOptions ''Filter )
477 $(deriveJSON defaultOptions ''Metric )
478 $(deriveJSON defaultOptions ''Cluster )
479 $(deriveJSON defaultOptions ''Proximity )
481 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
482 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
483 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
484 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
485 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
486 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
488 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
489 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
490 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
491 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
492 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
494 $(deriveJSON defaultOptions ''Filiation )
495 $(deriveJSON defaultOptions ''EdgeType )
498 ----------------------------
499 -- | TODO XML instances | --
500 ----------------------------