2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
51 -- | Global parameters of a Phylo
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_query :: PhyloQuery
56 } deriving (Generic, Show)
59 -- | Software parameters
61 Software { _software_name :: Text
62 , _software_version :: Text
63 } deriving (Generic, Show)
71 -- | Phylo datatype of a phylomemy
72 -- Duration : time Segment of the whole Phylo
73 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
74 -- Periods : list of all the periods of a Phylo
76 Phylo { _phylo_duration :: (Start, End)
77 , _phylo_foundations :: Vector Ngrams
78 , _phylo_periods :: [PhyloPeriod]
79 , _phylo_param :: PhyloParam
81 deriving (Generic, Show)
84 -- | Date : a simple Integer
87 -- | UTCTime in seconds since UNIX epoch
88 -- type Start = POSIXTime
89 -- type End = POSIXTime
99 -- | PhyloStep : steps of phylomemy on temporal axis
100 -- Period: tuple (start date, end date) of the step of the phylomemy
101 -- Levels: levels of granularity
103 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
104 , _phylo_periodLevels :: [PhyloLevel]
106 deriving (Generic, Show)
114 -- | PhyloLevel : levels of phylomemy on level axis
115 -- Levels description:
116 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
117 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
118 -- Level 1: First level of clustering
119 -- Level N: Nth level of clustering
121 PhyloLevel { _phylo_levelId :: PhyloLevelId
122 , _phylo_levelGroups :: [PhyloGroup]
124 deriving (Generic, Show)
132 -- | PhyloGroup : group of ngrams at each level and step
133 -- Label : maybe has a label as text
134 -- Ngrams: set of terms that build the group
135 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
136 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
137 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
138 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
140 PhyloGroup { _phylo_groupId :: PhyloGroupId
141 , _phylo_groupLabel :: Text
142 , _phylo_groupNgrams :: [Int]
143 , _phylo_groupMeta :: Map Text Double
144 , _phylo_groupCooc :: Map (Int, Int) Double
145 , _phylo_groupBranchId :: Maybe PhyloBranchId
147 , _phylo_groupPeriodParents :: [Pointer]
148 , _phylo_groupPeriodChilds :: [Pointer]
150 , _phylo_groupLevelParents :: [Pointer]
151 , _phylo_groupLevelChilds :: [Pointer]
153 deriving (Generic, Show, Eq, Ord)
156 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
158 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
162 type PhyloPeriodId = (Start, End)
163 type PhyloLevelId = (PhyloPeriodId, Level)
164 type PhyloGroupId = (PhyloLevelId, Index)
165 type PhyloBranchId = (Level, Index)
168 -- | Weight : A generic mesure that can be associated with an Id
170 -- | Pointer : A weighted linked with a given PhyloGroup
171 type Pointer = (PhyloGroupId, Weight)
172 -- | Ngrams : a contiguous sequence of n terms
181 -- | Document : a piece of Text linked to a Date
182 data Document = Document
188 -- | Clique : Set of ngrams cooccurring in the same Document
189 type Clique = Set Ngrams
190 -- | Support : Number of Documents where a Clique occurs
192 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
193 type PhyloFis = (Clique,Support)
196 -- | A list of clustered PhyloGroup
197 type PhyloCluster = [PhyloGroup]
200 -- | A List of PhyloGroup in a Graph
201 type GroupNodes = [PhyloGroup]
202 -- | A List of weighted links between some PhyloGroups in a Graph
203 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
204 -- | The association as a Graph between a list of Nodes and a list of Edges
205 type GroupGraph = (GroupNodes,GroupEdges)
213 data PhyloError = LevelDoesNotExist
223 -- | Cluster constructors
224 data Cluster = Fis FisParams
225 | RelatedComponents RCParams
226 | Louvain LouvainParams
229 -- | Parameters for Fis clustering
230 data FisParams = FisParams
231 { _fis_filtered :: Bool
232 , _fis_keepMinorFis :: Bool
233 , _fis_minSupport :: Support
236 -- | Parameters for RelatedComponents clustering
237 data RCParams = RCParams
238 { _rc_proximity :: Proximity } deriving (Show)
240 -- | Parameters for Louvain clustering
241 data LouvainParams = LouvainParams
242 { _louvain_proximity :: Proximity } deriving (Show)
250 -- | Proximity constructors
251 data Proximity = WeightedLogJaccard WLJParams
252 | Hamming HammingParams
256 -- | Parameters for WeightedLogJaccard proximity
257 data WLJParams = WLJParams
258 { _wlj_threshold :: Double
259 , _wlj_sensibility :: Double
262 -- | Parameters for Hamming proximity
263 data HammingParams = HammingParams
264 { _hamming_threshold :: Double } deriving (Show)
272 -- | Filter constructors
273 data Filter = LonelyBranch LBParams deriving (Show)
275 -- | Parameters for LonelyBranch filter
276 data LBParams = LBParams
277 { _lb_periodsInf :: Int
278 , _lb_periodsSup :: Int
279 , _lb_minNodes :: Int } deriving (Show)
287 -- | Metric constructors
288 data Metric = BranchAge deriving (Show)
296 -- | Tagger constructors
297 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
305 -- | Sort constructors
306 data Sort = ByBranchAge deriving (Show)
307 data Order = Asc | Desc deriving (Show)
315 -- | A Phyloquery describes a phylomemic reconstruction
316 data PhyloQuery = PhyloQuery
317 { _q_phyloTitle :: Text
318 , _q_phyloDesc :: Text
320 -- Grain and Steps for the PhyloPeriods
321 , _q_periodGrain :: Int
322 , _q_periodSteps :: Int
324 -- Clustering method for making level 1 of the Phylo
325 , _q_cluster :: Cluster
327 -- Inter-temporal matching method of the Phylo
328 , _q_interTemporalMatching :: Proximity
330 -- Last level of reconstruction
331 , _q_nthLevel :: Level
332 -- Clustering method used from level 1 to nthLevel
333 , _q_nthCluster :: Cluster
336 data Filiation = Ascendant | Descendant | Complete deriving (Show)
337 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
345 -- | A PhyloView is the output type of a Phylo
346 data PhyloView = PhyloView
347 { _phylo_viewParam :: PhyloParam
348 , _phylo_viewTitle :: Text
349 , _phylo_viewDescription :: Text
350 , _phylo_viewFiliation :: Filiation
351 , _phylo_viewMeta :: Map Text Double
352 , _phylo_viewBranches :: [PhyloBranch]
353 , _phylo_viewNodes :: [PhyloNode]
354 , _phylo_viewEdges :: [PhyloEdge]
357 -- | A phyloview is made of PhyloBranches, edges and nodes
358 data PhyloBranch = PhyloBranch
359 { _phylo_branchId :: PhyloBranchId
360 , _phylo_branchLabel :: Text
361 , _phylo_branchMeta :: Map Text Double
364 data PhyloEdge = PhyloEdge
365 { _phylo_edgeSource :: PhyloGroupId
366 , _phylo_edgeTarget :: PhyloGroupId
367 , _phylo_edgeType :: EdgeType
368 , _phylo_edgeWeight :: Weight
371 data PhyloNode = PhyloNode
372 { _phylo_nodeId :: PhyloGroupId
373 , _phylo_nodeBranchId :: Maybe PhyloBranchId
374 , _phylo_nodeLabel :: Text
375 , _phylo_nodeNgramsIdx :: [Int]
376 , _phylo_nodeNgrams :: Maybe [Ngrams]
377 , _phylo_nodeMeta :: Map Text Double
378 , _phylo_nodeParent :: Maybe PhyloGroupId
379 , _phylo_nodeChilds :: [PhyloNode]
383 ------------------------
384 -- | PhyloQueryView | --
385 ------------------------
388 data DisplayMode = Flat | Nested
390 -- | A PhyloQueryView describes a Phylo as an output view
391 data PhyloQueryView = PhyloQueryView
394 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ?
395 , _qv_filiation :: Filiation
397 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
399 , _qv_childsDepth :: Level
401 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
402 -- Firstly the metrics, then the filters and the taggers
403 , _qv_metrics :: [Metric]
404 , _qv_filters :: [Filter]
405 , _qv_taggers :: [Tagger]
407 -- An asc or desc sort to apply to the PhyloGraph
408 , _qv_sort :: Maybe (Sort,Order)
410 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
411 , _qv_display :: DisplayMode
412 , _qv_verbose :: Bool
421 makeLenses ''PhyloParam
422 makeLenses ''Software
425 makeLenses ''PhyloGroup
426 makeLenses ''PhyloLevel
427 makeLenses ''PhyloPeriod
429 makeLenses ''Proximity
433 makeLenses ''PhyloQuery
434 makeLenses ''PhyloQueryView
436 makeLenses ''PhyloView
437 makeLenses ''PhyloBranch
438 makeLenses ''PhyloNode
439 makeLenses ''PhyloEdge
442 ------------------------
443 -- | JSON instances | --
444 ------------------------
447 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
448 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
449 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
450 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
452 $(deriveJSON (unPrefix "_software_" ) ''Software )
453 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
455 $(deriveJSON defaultOptions ''Cluster )
456 $(deriveJSON defaultOptions ''Proximity )
458 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
459 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
460 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
461 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
462 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
464 $(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
467 ----------------------------
468 -- | TODO XML instances | --
469 ----------------------------