2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
51 -- | Global parameters of a Phylo
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_query :: PhyloQuery
56 } deriving (Generic, Show)
59 -- | Software parameters
61 Software { _software_name :: Text
62 , _software_version :: Text
63 } deriving (Generic, Show)
71 -- | Phylo datatype of a phylomemy
72 -- Duration : time Segment of the whole Phylo
73 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
74 -- Periods : list of all the periods of a Phylo
76 Phylo { _phylo_duration :: (Start, End)
77 , _phylo_foundations :: Vector Ngrams
78 -- , _phylo_peaks :: PhyloPeaks
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_param :: PhyloParam
82 deriving (Generic, Show)
85 -- PhyloPeaks { _phylo_peaksLabel :: Vector Ngrams
86 -- , _phylo_peaksTrees :: [(Ngrams, TreeNgrams)]
88 -- deriving (Generic, Show)
91 -- | Date : a simple Integer
94 -- | UTCTime in seconds since UNIX epoch
95 -- type Start = POSIXTime
96 -- type End = POSIXTime
101 ---------------------
102 -- | PhyloPeriod | --
103 ---------------------
106 -- | PhyloStep : steps of phylomemy on temporal axis
107 -- Period: tuple (start date, end date) of the step of the phylomemy
108 -- Levels: levels of granularity
110 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
111 , _phylo_periodLevels :: [PhyloLevel]
113 deriving (Generic, Show)
121 -- | PhyloLevel : levels of phylomemy on level axis
122 -- Levels description:
123 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
124 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
125 -- Level 1: First level of clustering
126 -- Level N: Nth level of clustering
128 PhyloLevel { _phylo_levelId :: PhyloLevelId
129 , _phylo_levelGroups :: [PhyloGroup]
131 deriving (Generic, Show)
139 -- | PhyloGroup : group of ngrams at each level and step
140 -- Label : maybe has a label as text
141 -- Ngrams: set of terms that build the group
142 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
143 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
144 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
145 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
147 PhyloGroup { _phylo_groupId :: PhyloGroupId
148 , _phylo_groupLabel :: Text
149 , _phylo_groupNgrams :: [Int]
150 , _phylo_groupMeta :: Map Text Double
151 , _phylo_groupCooc :: Map (Int, Int) Double
152 , _phylo_groupBranchId :: Maybe PhyloBranchId
154 , _phylo_groupPeriodParents :: [Pointer]
155 , _phylo_groupPeriodChilds :: [Pointer]
157 , _phylo_groupLevelParents :: [Pointer]
158 , _phylo_groupLevelChilds :: [Pointer]
160 deriving (Generic, Show, Eq, Ord)
163 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
165 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
169 type PhyloPeriodId = (Start, End)
170 type PhyloLevelId = (PhyloPeriodId, Level)
171 type PhyloGroupId = (PhyloLevelId, Index)
172 type PhyloBranchId = (Level, Index)
175 -- | Weight : A generic mesure that can be associated with an Id
177 -- | Pointer : A weighted linked with a given PhyloGroup
178 type Pointer = (PhyloGroupId, Weight)
179 -- | Ngrams : a contiguous sequence of n terms
188 -- | Document : a piece of Text linked to a Date
189 data Document = Document
195 -- | Clique : Set of ngrams cooccurring in the same Document
196 type Clique = Set Ngrams
197 -- | Support : Number of Documents where a Clique occurs
199 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
200 type PhyloFis = (Clique,Support)
203 -- | A list of clustered PhyloGroup
204 type PhyloCluster = [PhyloGroup]
207 -- | A List of PhyloGroup in a Graph
208 type GroupNodes = [PhyloGroup]
209 -- | A List of weighted links between some PhyloGroups in a Graph
210 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
211 -- | The association as a Graph between a list of Nodes and a list of Edges
212 type GroupGraph = (GroupNodes,GroupEdges)
220 data PhyloError = LevelDoesNotExist
230 -- | Cluster constructors
231 data Cluster = Fis FisParams
232 | RelatedComponents RCParams
233 | Louvain LouvainParams
236 -- | Parameters for Fis clustering
237 data FisParams = FisParams
238 { _fis_filtered :: Bool
239 , _fis_keepMinorFis :: Bool
240 , _fis_minSupport :: Support
243 -- | Parameters for RelatedComponents clustering
244 data RCParams = RCParams
245 { _rc_proximity :: Proximity } deriving (Show)
247 -- | Parameters for Louvain clustering
248 data LouvainParams = LouvainParams
249 { _louvain_proximity :: Proximity } deriving (Show)
257 -- | Proximity constructors
258 data Proximity = WeightedLogJaccard WLJParams
259 | Hamming HammingParams
263 -- | Parameters for WeightedLogJaccard proximity
264 data WLJParams = WLJParams
265 { _wlj_threshold :: Double
266 , _wlj_sensibility :: Double
269 -- | Parameters for Hamming proximity
270 data HammingParams = HammingParams
271 { _hamming_threshold :: Double } deriving (Show)
279 -- | Filter constructors
280 data Filter = SmallBranch SBParams deriving (Show)
282 -- | Parameters for SmallBranch filter
283 data SBParams = SBParams
284 { _sb_periodsInf :: Int
285 , _sb_periodsSup :: Int
286 , _sb_minNodes :: Int } deriving (Show)
294 -- | Metric constructors
295 data Metric = BranchAge deriving (Show)
303 -- | Tagger constructors
304 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
312 -- | Sort constructors
313 data Sort = ByBranchAge deriving (Show)
314 data Order = Asc | Desc deriving (Show)
322 -- | A Phyloquery describes a phylomemic reconstruction
323 data PhyloQuery = PhyloQuery
324 { _q_phyloTitle :: Text
325 , _q_phyloDesc :: Text
327 -- Grain and Steps for the PhyloPeriods
328 , _q_periodGrain :: Int
329 , _q_periodSteps :: Int
331 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
332 , _q_contextualUnit :: Cluster
334 -- Inter-temporal matching method of the Phylo
335 , _q_interTemporalMatching :: Proximity
337 -- Last level of reconstruction
338 , _q_nthLevel :: Level
339 -- Clustering method used from level 1 to nthLevel
340 , _q_nthCluster :: Cluster
343 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
344 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Show)
345 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
353 -- | A PhyloView is the output type of a Phylo
354 data PhyloView = PhyloView
355 { _phylo_viewParam :: PhyloParam
356 , _phylo_viewTitle :: Text
357 , _phylo_viewDescription :: Text
358 , _phylo_viewFiliation :: Filiation
359 , _phylo_viewMetrics :: Map Text [Double]
360 , _phylo_viewBranches :: [PhyloBranch]
361 , _phylo_viewNodes :: [PhyloNode]
362 , _phylo_viewEdges :: [PhyloEdge]
365 -- | A phyloview is made of PhyloBranches, edges and nodes
366 data PhyloBranch = PhyloBranch
367 { _phylo_branchId :: PhyloBranchId
368 , _phylo_branchLabel :: Text
369 , _phylo_branchMetrics :: Map Text [Double]
372 data PhyloEdge = PhyloEdge
373 { _phylo_edgeSource :: PhyloGroupId
374 , _phylo_edgeTarget :: PhyloGroupId
375 , _phylo_edgeType :: EdgeType
376 , _phylo_edgeWeight :: Weight
379 data PhyloNode = PhyloNode
380 { _phylo_nodeId :: PhyloGroupId
381 , _phylo_nodeBranchId :: Maybe PhyloBranchId
382 , _phylo_nodeLabel :: Text
383 , _phylo_nodeNgramsIdx :: [Int]
384 , _phylo_nodeNgrams :: Maybe [Ngrams]
385 , _phylo_nodeMetrics :: Map Text [Double]
386 , _phylo_nodeLevelParents :: Maybe [PhyloGroupId]
387 , _phylo_nodeLevelChilds :: [PhyloNode]
391 ------------------------
392 -- | PhyloQueryView | --
393 ------------------------
396 data DisplayMode = Flat | Nested
398 -- | A PhyloQueryView describes a Phylo as an output view
399 data PhyloQueryView = PhyloQueryView
402 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
403 , _qv_filiation :: Filiation
405 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
406 , _qv_levelChilds :: Bool
407 , _qv_levelChildsDepth :: Level
409 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
410 -- Firstly the metrics, then the filters and the taggers
411 , _qv_metrics :: [Metric]
412 , _qv_filters :: [Filter]
413 , _qv_taggers :: [Tagger]
415 -- An asc or desc sort to apply to the PhyloGraph
416 , _qv_sort :: Maybe (Sort,Order)
418 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
419 , _qv_display :: DisplayMode
420 , _qv_verbose :: Bool
429 makeLenses ''PhyloParam
430 makeLenses ''Software
433 makeLenses ''PhyloGroup
434 makeLenses ''PhyloLevel
435 makeLenses ''PhyloPeriod
437 makeLenses ''Proximity
441 makeLenses ''PhyloQuery
442 makeLenses ''PhyloQueryView
444 makeLenses ''PhyloView
445 makeLenses ''PhyloBranch
446 makeLenses ''PhyloNode
447 makeLenses ''PhyloEdge
450 ------------------------
451 -- | JSON instances | --
452 ------------------------
455 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
456 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
457 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
458 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
460 $(deriveJSON (unPrefix "_software_" ) ''Software )
461 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
463 $(deriveJSON defaultOptions ''Cluster )
464 $(deriveJSON defaultOptions ''Proximity )
466 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
467 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
468 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
469 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
470 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
472 $(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
475 ----------------------------
476 -- | TODO XML instances | --
477 ----------------------------