2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Prelude (Bounded)
33 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON,defaultOptions)
35 import Data.Maybe (Maybe)
36 import Data.Text (Text)
39 import Data.Vector (Vector)
40 --import Data.Time.Clock.POSIX (POSIXTime)
41 import GHC.Generics (Generic)
42 --import Gargantext.Database.Schema.Ngrams (NgramsId)
43 import Gargantext.Core.Utils.Prefix (unPrefix)
44 import Gargantext.Text.Context (TermList)
45 import Gargantext.Prelude
52 -- | Global parameters of a Phylo
54 PhyloParam { _phyloParam_version :: Text -- Double ?
55 , _phyloParam_software :: Software
56 , _phyloParam_query :: PhyloQueryBuild
57 } deriving (Generic, Show, Eq)
60 -- | Software parameters
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic, Show, Eq)
72 -- | Phylo datatype of a phylomemy
73 -- Duration : time Segment of the whole Phylo
74 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
75 -- Periods : list of all the periods of a Phylo
77 Phylo { _phylo_duration :: (Start, End)
78 , _phylo_foundations :: PhyloFoundations
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_docsByYears :: Map Date Double
81 , _phylo_cooc :: Map Date (Map (Int,Int) Double)
82 , _phylo_fis :: Map (Date,Date) [PhyloFis]
83 , _phylo_param :: PhyloParam
85 deriving (Generic, Show, Eq)
88 -- | The foundations of a phylomemy created from a given TermList
89 data PhyloFoundations =
90 PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
91 , _phylo_foundationsTermsList :: TermList
92 } deriving (Generic, Show, Eq)
95 -- | Date : a simple Integer
98 -- | UTCTime in seconds since UNIX epoch
99 -- type Start = POSIXTime
100 -- type End = POSIXTime
105 ---------------------
106 -- | PhyloPeriod | --
107 ---------------------
110 -- | PhyloStep : steps of phylomemy on temporal axis
111 -- Period: tuple (start date, end date) of the step of the phylomemy
112 -- Levels: levels of granularity
114 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
115 , _phylo_periodLevels :: [PhyloLevel]
117 deriving (Generic, Show, Eq)
125 -- | PhyloLevel : levels of phylomemy on level axis
126 -- Levels description:
127 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
128 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
129 -- Level 1: First level of clustering
130 -- Level N: Nth level of clustering
132 PhyloLevel { _phylo_levelId :: PhyloLevelId
133 , _phylo_levelGroups :: [PhyloGroup]
135 deriving (Generic, Show, Eq)
143 -- | PhyloGroup : group of ngrams at each level and step
144 -- Label : maybe has a label as text
145 -- Ngrams: set of terms that build the group
146 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
147 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
148 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
149 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
151 PhyloGroup { _phylo_groupId :: PhyloGroupId
152 , _phylo_groupLabel :: Text
153 , _phylo_groupNgrams :: [Int]
154 , _phylo_groupMeta :: Map Text Double
155 , _phylo_groupBranchId :: Maybe PhyloBranchId
156 , _phylo_groupCooc :: Map (Int,Int) Double
158 , _phylo_groupPeriodParents :: [Pointer]
159 , _phylo_groupPeriodChilds :: [Pointer]
161 , _phylo_groupLevelParents :: [Pointer]
162 , _phylo_groupLevelChilds :: [Pointer]
164 deriving (Generic, Show, Eq, Ord)
167 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
169 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
173 type PhyloPeriodId = (Start, End)
174 type PhyloLevelId = (PhyloPeriodId, Level)
175 type PhyloGroupId = (PhyloLevelId, Index)
176 type PhyloBranchId = (Level, Index)
179 -- | Weight : A generic mesure that can be associated with an Id
181 -- | Pointer : A weighted linked with a given PhyloGroup
182 type Pointer = (PhyloGroupId, Weight)
183 -- | Ngrams : a contiguous sequence of n terms
192 -- | Document : a piece of Text linked to a Date
193 data Document = Document
196 } deriving (Show,Generic)
198 -- | Clique : Set of ngrams cooccurring in the same Document
199 type Clique = Set Ngrams
200 -- | Support : Number of Documents where a Clique occurs
202 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
203 data PhyloFis = PhyloFis
204 { _phyloFis_clique :: Clique
205 , _phyloFis_support :: Support
206 , _phyloFis_period :: (Date,Date)
207 } deriving (Generic,Show,Eq)
209 -- | A list of clustered PhyloGroup
210 type PhyloCluster = [PhyloGroup]
213 -- | A PhyloGroup in a Graph
214 type GroupNode = PhyloGroup
215 -- | A weighted links between two PhyloGroups in a Graph
216 type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
217 -- | The association as a Graph between a list of Nodes and a list of Edges
218 type GroupGraph = ([GroupNode],[GroupEdge])
226 data PhyloError = LevelDoesNotExist
236 -- | Cluster constructors
237 data Cluster = Fis FisParams
238 | RelatedComponents RCParams
239 | Louvain LouvainParams
240 deriving (Generic, Show, Eq, Read)
242 -- | Parameters for Fis clustering
243 data FisParams = FisParams
244 { _fis_keepMinorFis :: Bool
245 , _fis_minSupport :: Support
246 , _fis_minSize :: Int
247 } deriving (Generic, Show, Eq, Read)
249 -- | Parameters for RelatedComponents clustering
250 data RCParams = RCParams
251 { _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
253 -- | Parameters for Louvain clustering
254 data LouvainParams = LouvainParams
255 { _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
263 -- | Proximity constructors
264 data Proximity = WeightedLogJaccard WLJParams
265 | Hamming HammingParams
267 deriving (Generic, Show, Eq, Read)
269 -- | Parameters for WeightedLogJaccard proximity
270 data WLJParams = WLJParams
271 { _wlj_threshold :: Double
272 , _wlj_sensibility :: Double
273 } deriving (Generic, Show, Eq, Read)
275 -- | Parameters for Hamming proximity
276 data HammingParams = HammingParams
277 { _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
285 -- | Filter constructors
286 data Filter = LonelyBranch LBParams
287 | SizeBranch SBParams
288 deriving (Generic, Show, Eq)
290 -- | Parameters for LonelyBranch filter
291 data LBParams = LBParams
292 { _lb_periodsInf :: Int
293 , _lb_periodsSup :: Int
294 , _lb_minNodes :: Int } deriving (Generic, Show, Eq)
296 -- | Parameters for SizeBranch filter
297 data SBParams = SBParams
298 { _sb_minSize :: Int } deriving (Generic, Show, Eq)
306 -- | Metric constructors
307 data Metric = BranchAge deriving (Generic, Show, Eq, Read)
315 -- | Tagger constructors
316 data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
324 -- | Sort constructors
325 data Sort = ByBranchAge deriving (Generic, Show, Read, Enum, Bounded)
326 data Order = Asc | Desc deriving (Generic, Show, Read)
334 -- | A Phyloquery describes a phylomemic reconstruction
335 data PhyloQueryBuild = PhyloQueryBuild
336 { _q_phyloTitle :: Text
337 , _q_phyloDesc :: Text
339 -- Grain and Steps for the PhyloPeriods
340 , _q_periodGrain :: Int
341 , _q_periodSteps :: Int
343 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
344 , _q_contextualUnit :: Cluster
345 , _q_contextualUnitMetrics :: [Metric]
346 , _q_contextualUnitFilters :: [Filter]
348 -- Inter-temporal matching method of the Phylo
349 , _q_interTemporalMatching :: Proximity
351 -- Last level of reconstruction
352 , _q_nthLevel :: Level
353 -- Clustering method used from level 1 to nthLevel
354 , _q_nthCluster :: Cluster
355 } deriving (Generic, Show, Eq)
357 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
358 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
359 data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
366 -- | A PhyloView is the output type of a Phylo
367 data PhyloView = PhyloView
368 { _pv_param :: PhyloParam
370 , _pv_description :: Text
371 , _pv_filiation :: Filiation
373 , _pv_periods :: [PhyloPeriodId]
374 , _pv_metrics :: Map Text [Double]
375 , _pv_branches :: [PhyloBranch]
376 , _pv_nodes :: [PhyloNode]
377 , _pv_edges :: [PhyloEdge]
378 } deriving (Generic, Show)
380 -- | A phyloview is made of PhyloBranches, edges and nodes
381 data PhyloBranch = PhyloBranch
382 { _pb_id :: PhyloBranchId
384 , _pb_metrics :: Map Text [Double]
385 } deriving (Generic, Show)
387 data PhyloEdge = PhyloEdge
388 { _pe_source :: PhyloGroupId
389 , _pe_target :: PhyloGroupId
390 , _pe_type :: EdgeType
391 , _pe_weight :: Weight
392 } deriving (Generic, Show)
394 data PhyloNode = PhyloNode
395 { _pn_id :: PhyloGroupId
396 , _pn_bid :: Maybe PhyloBranchId
399 , _pn_ngrams :: Maybe [Ngrams]
400 , _pn_metrics :: Map Text [Double]
401 , _pn_parents :: Maybe [PhyloGroupId]
402 , _pn_childs :: [PhyloNode]
403 } deriving (Generic, Show)
405 ------------------------
406 -- | PhyloQueryView | --
407 ------------------------
410 data ExportMode = Json | Dot | Svg
411 deriving (Generic, Show, Read)
412 data DisplayMode = Flat | Nested
413 deriving (Generic, Show, Read)
415 -- | A PhyloQueryView describes a Phylo as an output view
416 data PhyloQueryView = PhyloQueryView
419 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
420 , _qv_filiation :: Filiation
422 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
423 , _qv_levelChilds :: Bool
424 , _qv_levelChildsDepth :: Level
426 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
427 -- Firstly the metrics, then the filters and the taggers
428 , _qv_metrics :: [Metric]
429 , _qv_filters :: [Filter]
430 , _qv_taggers :: [Tagger]
432 -- An asc or desc sort to apply to the PhyloGraph
433 , _qv_sort :: Maybe (Sort,Order)
435 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
436 , _qv_export :: ExportMode
437 , _qv_display :: DisplayMode
438 , _qv_verbose :: Bool
447 makeLenses ''PhyloParam
448 makeLenses ''Software
451 makeLenses ''PhyloFoundations
452 makeLenses ''PhyloGroup
453 makeLenses ''PhyloLevel
454 makeLenses ''PhyloPeriod
455 makeLenses ''PhyloFis
457 makeLenses ''Proximity
461 makeLenses ''PhyloQueryBuild
462 makeLenses ''PhyloQueryView
464 makeLenses ''PhyloView
465 makeLenses ''PhyloBranch
466 makeLenses ''PhyloNode
467 makeLenses ''PhyloEdge
470 ------------------------
471 -- | JSON instances | --
472 ------------------------
475 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
476 $(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
477 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
478 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
479 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
480 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
482 $(deriveJSON (unPrefix "_software_" ) ''Software )
483 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
485 $(deriveJSON defaultOptions ''Filter )
486 $(deriveJSON defaultOptions ''Metric )
487 $(deriveJSON defaultOptions ''Cluster )
488 $(deriveJSON defaultOptions ''Proximity )
490 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
491 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
492 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
493 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
494 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
496 $(deriveJSON (unPrefix "_lb_" ) ''LBParams )
497 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
499 $(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
500 $(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
501 $(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
502 $(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
503 $(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
505 $(deriveJSON defaultOptions ''Filiation )
506 $(deriveJSON defaultOptions ''EdgeType )
509 ----------------------------
510 -- | TODO XML instances | --
511 ----------------------------