2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
52 -- | Global parameters of a Phylo
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
60 -- | Software parameters
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_docsByYears :: Map Date Double
81 , _phylo_cooc :: Map Date (Map (Int,Int) Double)
82 , _phylo_param :: PhyloParam
84 deriving (Generic, Show, Eq)
87 -- | The foundations of a phylomemy created from a given TermList
88 data PhyloFoundations =
89 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
90 , _phylo_foundationsTermsList :: TermList
91 } deriving (Generic, Show, Eq)
94 -- | Date : a simple Integer
97 -- | UTCTime in seconds since UNIX epoch
98 -- type Start = POSIXTime
99 -- type End = POSIXTime
104 ---------------------
105 -- | PhyloPeriod | --
106 ---------------------
109 -- | PhyloStep : steps of phylomemy on temporal axis
110 -- Period: tuple (start date, end date) of the step of the phylomemy
111 -- Levels: levels of granularity
113 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
114 , _phylo_periodLevels :: [PhyloLevel]
116 deriving (Generic, Show, Eq)
124 -- | PhyloLevel : levels of phylomemy on level axis
125 -- Levels description:
126 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
127 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
128 -- Level 1: First level of clustering
129 -- Level N: Nth level of clustering
131 PhyloLevel { _phylo_levelId :: PhyloLevelId
132 , _phylo_levelGroups :: [PhyloGroup]
134 deriving (Generic, Show, Eq)
142 -- | PhyloGroup : group of ngrams at each level and step
143 -- Label : maybe has a label as text
144 -- Ngrams: set of terms that build the group
145 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
146 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
147 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
148 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
150 PhyloGroup { _phylo_groupId :: PhyloGroupId
151 , _phylo_groupLabel :: Text
152 , _phylo_groupNgrams :: [Int]
153 , _phylo_groupMeta :: Map Text Double
154 , _phylo_groupBranchId :: Maybe PhyloBranchId
155 , _phylo_groupCooc :: Map (Int,Int) Double
157 , _phylo_groupPeriodParents :: [Pointer]
158 , _phylo_groupPeriodChilds :: [Pointer]
160 , _phylo_groupLevelParents :: [Pointer]
161 , _phylo_groupLevelChilds :: [Pointer]
163 deriving (Generic, Show, Eq, Ord)
166 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
168 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
172 type PhyloPeriodId = (Start, End)
173 type PhyloLevelId = (PhyloPeriodId, Level)
174 type PhyloGroupId = (PhyloLevelId, Index)
175 type PhyloBranchId = (Level, Index)
178 -- | Weight : A generic mesure that can be associated with an Id
180 -- | Pointer : A weighted linked with a given PhyloGroup
181 type Pointer = (PhyloGroupId, Weight)
182 -- | Ngrams : a contiguous sequence of n terms
191 -- | Document : a piece of Text linked to a Date
192 data Document = Document
195 } deriving (Show,Generic)
197 -- | Clique : Set of ngrams cooccurring in the same Document
198 type Clique = Set Ngrams
199 -- | Support : Number of Documents where a Clique occurs
201 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
202 data PhyloFis = PhyloFis
203 { _phyloFis_clique :: Clique
204 , _phyloFis_support :: Support
205 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
208 -- | A list of clustered PhyloGroup
209 type PhyloCluster = [PhyloGroup]
212 -- | A PhyloGroup in a Graph
213 type GroupNode = PhyloGroup
214 -- | A weighted links between two PhyloGroups in a Graph
215 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
216 -- | The association as a Graph between a list of Nodes and a list of Edges
217 type GroupGraph = ([GroupNode],[GroupEdge])
225 data PhyloError = LevelDoesNotExist
235 -- | Cluster constructors
236 data Cluster = Fis FisParams
237 | RelatedComponents RCParams
238 | Louvain LouvainParams
239 deriving (Generic, Show, Eq, Read)
241 -- | Parameters for Fis clustering
242 data FisParams = FisParams
243 { _fis_keepMinorFis :: Bool
244 , _fis_minSupport :: Support
245 , _fis_minSize :: Int
246 } deriving (Generic, Show, Eq, Read)
248 -- | Parameters for RelatedComponents clustering
249 data RCParams = RCParams
250 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
252 -- | Parameters for Louvain clustering
253 data LouvainParams = LouvainParams
254 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
262 -- | Proximity constructors
263 data Proximity = WeightedLogJaccard WLJParams
264 | Hamming HammingParams
266 deriving (Generic, Show, Eq, Read)
268 -- | Parameters for WeightedLogJaccard proximity
269 data WLJParams = WLJParams
270 { _wlj_threshold :: Double
271 , _wlj_sensibility :: Double
272 } deriving (Generic, Show, Eq, Read)
274 -- | Parameters for Hamming proximity
275 data HammingParams = HammingParams
276 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
284 -- | Filter constructors
285 data Filter = LonelyBranch LBParams
286 | SizeBranch SBParams
287 deriving (Generic, Show, Eq)
289 -- | Parameters for LonelyBranch filter
290 data LBParams = LBParams
291 { _lb_periodsInf :: Int
292 , _lb_periodsSup :: Int
293 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
295 -- | Parameters for SizeBranch filter
296 data SBParams = SBParams
297 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
305 -- | Metric constructors
306 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
314 -- | Tagger constructors
315 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
323 -- | Sort constructors
324 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
325 data Order = Asc | Desc deriving (Generic, Show, Read)
333 -- | A Phyloquery describes a phylomemic reconstruction
334 data PhyloQueryBuild = PhyloQueryBuild
335 { _q_phyloTitle :: Text
336 , _q_phyloDesc :: Text
338 -- Grain and Steps for the PhyloPeriods
339 , _q_periodGrain :: Int
340 , _q_periodSteps :: Int
342 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
343 , _q_contextualUnit :: Cluster
344 , _q_contextualUnitMetrics :: [Metric]
345 , _q_contextualUnitFilters :: [Filter]
347 -- Inter-temporal matching method of the Phylo
348 , _q_interTemporalMatching :: Proximity
350 -- Last level of reconstruction
351 , _q_nthLevel :: Level
352 -- Clustering method used from level 1 to nthLevel
353 , _q_nthCluster :: Cluster
354 } deriving (Generic, Show, Eq)
356 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
357 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
358 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
365 -- | A PhyloView is the output type of a Phylo
366 data PhyloView = PhyloView
367 { _pv_param :: PhyloParam
369 , _pv_description :: Text
370 , _pv_filiation :: Filiation
372 , _pv_periods :: [PhyloPeriodId]
373 , _pv_metrics :: Map Text [Double]
374 , _pv_branches :: [PhyloBranch]
375 , _pv_nodes :: [PhyloNode]
376 , _pv_edges :: [PhyloEdge]
377 } deriving (Generic, Show)
379 -- | A phyloview is made of PhyloBranches, edges and nodes
380 data PhyloBranch = PhyloBranch
381 { _pb_id :: PhyloBranchId
383 , _pb_metrics :: Map Text [Double]
384 } deriving (Generic, Show)
386 data PhyloEdge = PhyloEdge
387 { _pe_source :: PhyloGroupId
388 , _pe_target :: PhyloGroupId
389 , _pe_type :: EdgeType
390 , _pe_weight :: Weight
391 } deriving (Generic, Show)
393 data PhyloNode = PhyloNode
394 { _pn_id :: PhyloGroupId
395 , _pn_bid :: Maybe PhyloBranchId
398 , _pn_ngrams :: Maybe [Ngrams]
399 , _pn_metrics :: Map Text [Double]
400 , _pn_parents :: Maybe [PhyloGroupId]
401 , _pn_childs :: [PhyloNode]
402 } deriving (Generic, Show)
404 ------------------------
405 -- | PhyloQueryView | --
406 ------------------------
409 data ExportMode = Json | Dot | Svg
410 deriving (Generic, Show, Read)
411 data DisplayMode = Flat | Nested
412 deriving (Generic, Show, Read)
414 -- | A PhyloQueryView describes a Phylo as an output view
415 data PhyloQueryView = PhyloQueryView
418 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
419 , _qv_filiation :: Filiation
421 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
422 , _qv_levelChilds :: Bool
423 , _qv_levelChildsDepth :: Level
425 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
426 -- Firstly the metrics, then the filters and the taggers
427 , _qv_metrics :: [Metric]
428 , _qv_filters :: [Filter]
429 , _qv_taggers :: [Tagger]
431 -- An asc or desc sort to apply to the PhyloGraph
432 , _qv_sort :: Maybe (Sort,Order)
434 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
435 , _qv_export :: ExportMode
436 , _qv_display :: DisplayMode
437 , _qv_verbose :: Bool
446 makeLenses ''PhyloParam
447 makeLenses ''Software
450 makeLenses ''PhyloFoundations
451 makeLenses ''PhyloGroup
452 makeLenses ''PhyloLevel
453 makeLenses ''PhyloPeriod
454 makeLenses ''PhyloFis
456 makeLenses ''Proximity
460 makeLenses ''PhyloQueryBuild
461 makeLenses ''PhyloQueryView
463 makeLenses ''PhyloView
464 makeLenses ''PhyloBranch
465 makeLenses ''PhyloNode
466 makeLenses ''PhyloEdge
469 ------------------------
470 -- | JSON instances | --
471 ------------------------
474 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
475 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
476 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
477 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
478 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
479 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
481 $(deriveJSON (unPrefix "_software_" ) ''Software )
482 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
484 $(deriveJSON defaultOptions ''Filter )
485 $(deriveJSON defaultOptions ''Metric )
486 $(deriveJSON defaultOptions ''Cluster )
487 $(deriveJSON defaultOptions ''Proximity )
489 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
490 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
491 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
492 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
493 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
495 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
496 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
498 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
499 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
500 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
501 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
502 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
504 $(deriveJSON defaultOptions ''Filiation )
505 $(deriveJSON defaultOptions ''EdgeType )
508 ----------------------------
509 -- | TODO XML instances | --
510 ----------------------------