2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
52 -- | Global parameters of a Phylo
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
60 -- | Software parameters
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_param :: PhyloParam
82 deriving (Generic, Show, Eq)
85 -- | The foundations of a phylomemy created from a given TermList
86 data PhyloFoundations =
87 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
88 , _phylo_foundationsTermsList :: TermList
89 } deriving (Generic, Show, Eq)
92 -- | Date : a simple Integer
95 -- | UTCTime in seconds since UNIX epoch
96 -- type Start = POSIXTime
97 -- type End = POSIXTime
102 ---------------------
103 -- | PhyloPeriod | --
104 ---------------------
107 -- | PhyloStep : steps of phylomemy on temporal axis
108 -- Period: tuple (start date, end date) of the step of the phylomemy
109 -- Levels: levels of granularity
111 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
112 , _phylo_periodLevels :: [PhyloLevel]
114 deriving (Generic, Show, Eq)
122 -- | PhyloLevel : levels of phylomemy on level axis
123 -- Levels description:
124 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
125 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
126 -- Level 1: First level of clustering
127 -- Level N: Nth level of clustering
129 PhyloLevel { _phylo_levelId :: PhyloLevelId
130 , _phylo_levelGroups :: [PhyloGroup]
132 deriving (Generic, Show, Eq)
140 -- | PhyloGroup : group of ngrams at each level and step
141 -- Label : maybe has a label as text
142 -- Ngrams: set of terms that build the group
143 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
144 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
145 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
146 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
148 PhyloGroup { _phylo_groupId :: PhyloGroupId
149 , _phylo_groupLabel :: Text
150 , _phylo_groupNgrams :: [Int]
151 , _phylo_groupMeta :: Map Text Double
152 , _phylo_groupCooc :: Map (Int, Int) Double
153 , _phylo_groupBranchId :: Maybe PhyloBranchId
155 , _phylo_groupPeriodParents :: [Pointer]
156 , _phylo_groupPeriodChilds :: [Pointer]
158 , _phylo_groupLevelParents :: [Pointer]
159 , _phylo_groupLevelChilds :: [Pointer]
161 deriving (Generic, Show, Eq, Ord)
164 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
166 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
170 type PhyloPeriodId = (Start, End)
171 type PhyloLevelId = (PhyloPeriodId, Level)
172 type PhyloGroupId = (PhyloLevelId, Index)
173 type PhyloBranchId = (Level, Index)
176 -- | Weight : A generic mesure that can be associated with an Id
178 -- | Pointer : A weighted linked with a given PhyloGroup
179 type Pointer = (PhyloGroupId, Weight)
180 -- | Ngrams : a contiguous sequence of n terms
189 -- | Document : a piece of Text linked to a Date
190 data Document = Document
193 } deriving (Show,Generic)
195 -- | Clique : Set of ngrams cooccurring in the same Document
196 type Clique = Set Ngrams
197 -- | Support : Number of Documents where a Clique occurs
199 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
200 data PhyloFis = PhyloFis
201 { _phyloFis_clique :: Clique
202 , _phyloFis_support :: Support
203 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
206 -- | A list of clustered PhyloGroup
207 type PhyloCluster = [PhyloGroup]
210 -- | A List of PhyloGroup in a Graph
211 type GroupNodes = [PhyloGroup]
212 -- | A List of weighted links between some PhyloGroups in a Graph
213 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
214 -- | The association as a Graph between a list of Nodes and a list of Edges
215 type GroupGraph = (GroupNodes,GroupEdges)
223 data PhyloError = LevelDoesNotExist
233 -- | Cluster constructors
234 data Cluster = Fis FisParams
235 | RelatedComponents RCParams
236 | Louvain LouvainParams
237 deriving (Generic, Show, Eq, Read)
239 -- | Parameters for Fis clustering
240 data FisParams = FisParams
241 { _fis_keepMinorFis :: Bool
242 , _fis_minSupport :: Support
243 , _fis_minSize :: Int
244 } deriving (Generic, Show, Eq, Read)
246 -- | Parameters for RelatedComponents clustering
247 data RCParams = RCParams
248 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
250 -- | Parameters for Louvain clustering
251 data LouvainParams = LouvainParams
252 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
260 -- | Proximity constructors
261 data Proximity = WeightedLogJaccard WLJParams
262 | Hamming HammingParams
264 deriving (Generic, Show, Eq, Read)
266 -- | Parameters for WeightedLogJaccard proximity
267 data WLJParams = WLJParams
268 { _wlj_threshold :: Double
269 , _wlj_sensibility :: Double
270 } deriving (Generic, Show, Eq, Read)
272 -- | Parameters for Hamming proximity
273 data HammingParams = HammingParams
274 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
282 -- | Filter constructors
283 data Filter = SmallBranch SBParams deriving (Generic, Show, Eq)
285 -- | Parameters for SmallBranch filter
286 data SBParams = SBParams
287 { _sb_periodsInf :: Int
288 , _sb_periodsSup :: Int
289 , _sb_minNodes :: Int } deriving (Generic, Show, Eq)
297 -- | Metric constructors
298 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
306 -- | Tagger constructors
307 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
315 -- | Sort constructors
316 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
317 data Order = Asc | Desc deriving (Generic, Show, Read)
325 -- | A Phyloquery describes a phylomemic reconstruction
326 data PhyloQueryBuild = PhyloQueryBuild
327 { _q_phyloTitle :: Text
328 , _q_phyloDesc :: Text
330 -- Grain and Steps for the PhyloPeriods
331 , _q_periodGrain :: Int
332 , _q_periodSteps :: Int
334 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
335 , _q_contextualUnit :: Cluster
336 , _q_contextualUnitMetrics :: [Metric]
337 , _q_contextualUnitFilters :: [Filter]
339 -- Inter-temporal matching method of the Phylo
340 , _q_interTemporalMatching :: Proximity
342 -- Last level of reconstruction
343 , _q_nthLevel :: Level
344 -- Clustering method used from level 1 to nthLevel
345 , _q_nthCluster :: Cluster
346 } deriving (Generic, Show, Eq)
348 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
349 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
350 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
357 -- | A PhyloView is the output type of a Phylo
358 data PhyloView = PhyloView
359 { _pv_param :: PhyloParam
361 , _pv_description :: Text
362 , _pv_filiation :: Filiation
364 , _pv_periods :: [PhyloPeriodId]
365 , _pv_metrics :: Map Text [Double]
366 , _pv_branches :: [PhyloBranch]
367 , _pv_nodes :: [PhyloNode]
368 , _pv_edges :: [PhyloEdge]
369 } deriving (Generic, Show)
371 -- | A phyloview is made of PhyloBranches, edges and nodes
372 data PhyloBranch = PhyloBranch
373 { _pb_id :: PhyloBranchId
375 , _pb_metrics :: Map Text [Double]
376 } deriving (Generic, Show)
378 data PhyloEdge = PhyloEdge
379 { _pe_source :: PhyloGroupId
380 , _pe_target :: PhyloGroupId
381 , _pe_type :: EdgeType
382 , _pe_weight :: Weight
383 } deriving (Generic, Show)
385 data PhyloNode = PhyloNode
386 { _pn_id :: PhyloGroupId
387 , _pn_bid :: Maybe PhyloBranchId
390 , _pn_ngrams :: Maybe [Ngrams]
391 , _pn_metrics :: Map Text [Double]
392 , _pn_parents :: Maybe [PhyloGroupId]
393 , _pn_childs :: [PhyloNode]
394 } deriving (Generic, Show)
396 ------------------------
397 -- | PhyloQueryView | --
398 ------------------------
401 data ExportMode = Json | Dot | Svg
402 deriving (Generic, Show, Read)
403 data DisplayMode = Flat | Nested
404 deriving (Generic, Show, Read)
406 -- | A PhyloQueryView describes a Phylo as an output view
407 data PhyloQueryView = PhyloQueryView
410 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
411 , _qv_filiation :: Filiation
413 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
414 , _qv_levelChilds :: Bool
415 , _qv_levelChildsDepth :: Level
417 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
418 -- Firstly the metrics, then the filters and the taggers
419 , _qv_metrics :: [Metric]
420 , _qv_filters :: [Filter]
421 , _qv_taggers :: [Tagger]
423 -- An asc or desc sort to apply to the PhyloGraph
424 , _qv_sort :: Maybe (Sort,Order)
426 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
427 , _qv_export :: ExportMode
428 , _qv_display :: DisplayMode
429 , _qv_verbose :: Bool
438 makeLenses ''PhyloParam
439 makeLenses ''Software
442 makeLenses ''PhyloFoundations
443 makeLenses ''PhyloGroup
444 makeLenses ''PhyloLevel
445 makeLenses ''PhyloPeriod
446 makeLenses ''PhyloFis
448 makeLenses ''Proximity
452 makeLenses ''PhyloQueryBuild
453 makeLenses ''PhyloQueryView
455 makeLenses ''PhyloView
456 makeLenses ''PhyloBranch
457 makeLenses ''PhyloNode
458 makeLenses ''PhyloEdge
461 ------------------------
462 -- | JSON instances | --
463 ------------------------
466 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
467 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
468 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
469 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
470 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
471 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
473 $(deriveJSON (unPrefix "_software_" ) ''Software )
474 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
476 $(deriveJSON defaultOptions ''Filter )
477 $(deriveJSON defaultOptions ''Metric )
478 $(deriveJSON defaultOptions ''Cluster )
479 $(deriveJSON defaultOptions ''Proximity )
481 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
482 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
483 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
484 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
485 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
486 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
488 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
489 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
490 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
491 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
492 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
494 $(deriveJSON defaultOptions ''Filiation )
495 $(deriveJSON defaultOptions ''EdgeType )
498 ----------------------------
499 -- | TODO XML instances | --
500 ----------------------------