2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
52 -- | Global parameters of a Phylo
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
60 -- | Software parameters
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_param :: PhyloParam
82 deriving (Generic, Show, Eq)
85 -- | The foundations of a phylomemy created from a given TermList
86 data PhyloFoundations =
87 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
88 , _phylo_foundationsTermsList :: TermList
89 } deriving (Generic, Show, Eq)
92 -- | Date : a simple Integer
95 -- | UTCTime in seconds since UNIX epoch
96 -- type Start = POSIXTime
97 -- type End = POSIXTime
102 ---------------------
103 -- | PhyloPeriod | --
104 ---------------------
107 -- | PhyloStep : steps of phylomemy on temporal axis
108 -- Period: tuple (start date, end date) of the step of the phylomemy
109 -- Levels: levels of granularity
111 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
112 , _phylo_periodLevels :: [PhyloLevel]
114 deriving (Generic, Show, Eq)
122 -- | PhyloLevel : levels of phylomemy on level axis
123 -- Levels description:
124 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
125 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
126 -- Level 1: First level of clustering
127 -- Level N: Nth level of clustering
129 PhyloLevel { _phylo_levelId :: PhyloLevelId
130 , _phylo_levelGroups :: [PhyloGroup]
132 deriving (Generic, Show, Eq)
140 -- | PhyloGroup : group of ngrams at each level and step
141 -- Label : maybe has a label as text
142 -- Ngrams: set of terms that build the group
143 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
144 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
145 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
146 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
148 PhyloGroup { _phylo_groupId :: PhyloGroupId
149 , _phylo_groupLabel :: Text
150 , _phylo_groupNgrams :: [Int]
151 , _phylo_groupMeta :: Map Text Double
152 , _phylo_groupBranchId :: Maybe PhyloBranchId
154 , _phylo_groupPeriodParents :: [Pointer]
155 , _phylo_groupPeriodChilds :: [Pointer]
157 , _phylo_groupLevelParents :: [Pointer]
158 , _phylo_groupLevelChilds :: [Pointer]
160 deriving (Generic, Show, Eq, Ord)
163 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
165 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
169 type PhyloPeriodId = (Start, End)
170 type PhyloLevelId = (PhyloPeriodId, Level)
171 type PhyloGroupId = (PhyloLevelId, Index)
172 type PhyloBranchId = (Level, Index)
175 -- | Weight : A generic mesure that can be associated with an Id
177 -- | Pointer : A weighted linked with a given PhyloGroup
178 type Pointer = (PhyloGroupId, Weight)
179 -- | Ngrams : a contiguous sequence of n terms
188 -- | Document : a piece of Text linked to a Date
189 data Document = Document
192 } deriving (Show,Generic)
194 -- | Clique : Set of ngrams cooccurring in the same Document
195 type Clique = Set Ngrams
196 -- | Support : Number of Documents where a Clique occurs
198 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
199 data PhyloFis = PhyloFis
200 { _phyloFis_clique :: Clique
201 , _phyloFis_support :: Support
202 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
205 -- | A list of clustered PhyloGroup
206 type PhyloCluster = [PhyloGroup]
209 -- | A PhyloGroup in a Graph
210 type GroupNode = PhyloGroup
211 -- | A weighted links between two PhyloGroups in a Graph
212 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
213 -- | The association as a Graph between a list of Nodes and a list of Edges
214 type GroupGraph = ([GroupNode],[GroupEdge])
222 data PhyloError = LevelDoesNotExist
232 -- | Cluster constructors
233 data Cluster = Fis FisParams
234 | RelatedComponents RCParams
235 | Louvain LouvainParams
236 deriving (Generic, Show, Eq, Read)
238 -- | Parameters for Fis clustering
239 data FisParams = FisParams
240 { _fis_keepMinorFis :: Bool
241 , _fis_minSupport :: Support
242 , _fis_minSize :: Int
243 } deriving (Generic, Show, Eq, Read)
245 -- | Parameters for RelatedComponents clustering
246 data RCParams = RCParams
247 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
249 -- | Parameters for Louvain clustering
250 data LouvainParams = LouvainParams
251 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
259 -- | Proximity constructors
260 data Proximity = WeightedLogJaccard WLJParams
261 | Hamming HammingParams
263 deriving (Generic, Show, Eq, Read)
265 -- | Parameters for WeightedLogJaccard proximity
266 data WLJParams = WLJParams
267 { _wlj_threshold :: Double
268 , _wlj_sensibility :: Double
269 } deriving (Generic, Show, Eq, Read)
271 -- | Parameters for Hamming proximity
272 data HammingParams = HammingParams
273 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
281 -- | Filter constructors
282 data Filter = LonelyBranch LBParams
283 | SizeBranch SBParams
284 deriving (Generic, Show, Eq)
286 -- | Parameters for LonelyBranch filter
287 data LBParams = LBParams
288 { _lb_periodsInf :: Int
289 , _lb_periodsSup :: Int
290 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
292 -- | Parameters for SizeBranch filter
293 data SBParams = SBParams
294 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
302 -- | Metric constructors
303 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
311 -- | Tagger constructors
312 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
320 -- | Sort constructors
321 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
322 data Order = Asc | Desc deriving (Generic, Show, Read)
330 -- | A Phyloquery describes a phylomemic reconstruction
331 data PhyloQueryBuild = PhyloQueryBuild
332 { _q_phyloTitle :: Text
333 , _q_phyloDesc :: Text
335 -- Grain and Steps for the PhyloPeriods
336 , _q_periodGrain :: Int
337 , _q_periodSteps :: Int
339 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
340 , _q_contextualUnit :: Cluster
341 , _q_contextualUnitMetrics :: [Metric]
342 , _q_contextualUnitFilters :: [Filter]
344 -- Inter-temporal matching method of the Phylo
345 , _q_interTemporalMatching :: Proximity
347 -- Last level of reconstruction
348 , _q_nthLevel :: Level
349 -- Clustering method used from level 1 to nthLevel
350 , _q_nthCluster :: Cluster
351 } deriving (Generic, Show, Eq)
353 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
354 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
355 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
362 -- | A PhyloView is the output type of a Phylo
363 data PhyloView = PhyloView
364 { _pv_param :: PhyloParam
366 , _pv_description :: Text
367 , _pv_filiation :: Filiation
369 , _pv_periods :: [PhyloPeriodId]
370 , _pv_metrics :: Map Text [Double]
371 , _pv_branches :: [PhyloBranch]
372 , _pv_nodes :: [PhyloNode]
373 , _pv_edges :: [PhyloEdge]
374 } deriving (Generic, Show)
376 -- | A phyloview is made of PhyloBranches, edges and nodes
377 data PhyloBranch = PhyloBranch
378 { _pb_id :: PhyloBranchId
380 , _pb_metrics :: Map Text [Double]
381 } deriving (Generic, Show)
383 data PhyloEdge = PhyloEdge
384 { _pe_source :: PhyloGroupId
385 , _pe_target :: PhyloGroupId
386 , _pe_type :: EdgeType
387 , _pe_weight :: Weight
388 } deriving (Generic, Show)
390 data PhyloNode = PhyloNode
391 { _pn_id :: PhyloGroupId
392 , _pn_bid :: Maybe PhyloBranchId
395 , _pn_ngrams :: Maybe [Ngrams]
396 , _pn_metrics :: Map Text [Double]
397 , _pn_parents :: Maybe [PhyloGroupId]
398 , _pn_childs :: [PhyloNode]
399 } deriving (Generic, Show)
401 ------------------------
402 -- | PhyloQueryView | --
403 ------------------------
406 data ExportMode = Json | Dot | Svg
407 deriving (Generic, Show, Read)
408 data DisplayMode = Flat | Nested
409 deriving (Generic, Show, Read)
411 -- | A PhyloQueryView describes a Phylo as an output view
412 data PhyloQueryView = PhyloQueryView
415 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
416 , _qv_filiation :: Filiation
418 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
419 , _qv_levelChilds :: Bool
420 , _qv_levelChildsDepth :: Level
422 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
423 -- Firstly the metrics, then the filters and the taggers
424 , _qv_metrics :: [Metric]
425 , _qv_filters :: [Filter]
426 , _qv_taggers :: [Tagger]
428 -- An asc or desc sort to apply to the PhyloGraph
429 , _qv_sort :: Maybe (Sort,Order)
431 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
432 , _qv_export :: ExportMode
433 , _qv_display :: DisplayMode
434 , _qv_verbose :: Bool
443 makeLenses ''PhyloParam
444 makeLenses ''Software
447 makeLenses ''PhyloFoundations
448 makeLenses ''PhyloGroup
449 makeLenses ''PhyloLevel
450 makeLenses ''PhyloPeriod
451 makeLenses ''PhyloFis
453 makeLenses ''Proximity
457 makeLenses ''PhyloQueryBuild
458 makeLenses ''PhyloQueryView
460 makeLenses ''PhyloView
461 makeLenses ''PhyloBranch
462 makeLenses ''PhyloNode
463 makeLenses ''PhyloEdge
466 ------------------------
467 -- | JSON instances | --
468 ------------------------
471 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
472 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
473 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
474 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
475 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
476 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
478 $(deriveJSON (unPrefix "_software_" ) ''Software )
479 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
481 $(deriveJSON defaultOptions ''Filter )
482 $(deriveJSON defaultOptions ''Metric )
483 $(deriveJSON defaultOptions ''Cluster )
484 $(deriveJSON defaultOptions ''Proximity )
486 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
487 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
488 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
489 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
490 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
492 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
493 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
495 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
496 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
497 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
498 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
499 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
501 $(deriveJSON defaultOptions ''Filiation )
502 $(deriveJSON defaultOptions ''EdgeType )
505 ----------------------------
506 -- | TODO XML instances | --
507 ----------------------------