2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
38 import Data.Vector (Vector)
39 --import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 --import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
51 -- | Global parameters of a Phylo
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_query :: PhyloQuery
56 } deriving (Generic, Show, Eq)
59 -- | Software parameters
61 Software { _software_name :: Text
62 , _software_version :: Text
63 } deriving (Generic, Show, Eq)
71 -- | Phylo datatype of a phylomemy
72 -- Duration : time Segment of the whole Phylo
73 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
74 -- Periods : list of all the periods of a Phylo
76 Phylo { _phylo_duration :: (Start, End)
77 , _phylo_foundations :: Vector Ngrams
78 , _phylo_foundationsPeaks :: PhyloPeaks
79 , _phylo_periods :: [PhyloPeriod]
80 , _phylo_param :: PhyloParam
82 deriving (Generic, Show, Eq)
84 -- | The PhyloPeaks describe the aggregation of some foundations Ngrams behind a list of Ngrams trees (ie: a forest)
85 -- PeaksLabels are the root labels of each Ngrams trees
87 PhyloPeaks { _phylo_peaksLabels :: Vector Ngrams
88 , _phylo_peaksForest :: [Tree Ngrams]
90 deriving (Generic, Show, Eq)
92 -- | A Tree of Ngrams where each node is a label
93 data Tree a = Empty | Node a [Tree a] deriving (Show, Eq)
96 -- | Date : a simple Integer
99 -- | UTCTime in seconds since UNIX epoch
100 -- type Start = POSIXTime
101 -- type End = POSIXTime
106 ---------------------
107 -- | PhyloPeriod | --
108 ---------------------
111 -- | PhyloStep : steps of phylomemy on temporal axis
112 -- Period: tuple (start date, end date) of the step of the phylomemy
113 -- Levels: levels of granularity
115 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
116 , _phylo_periodLevels :: [PhyloLevel]
118 deriving (Generic, Show, Eq)
126 -- | PhyloLevel : levels of phylomemy on level axis
127 -- Levels description:
128 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
129 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
130 -- Level 1: First level of clustering
131 -- Level N: Nth level of clustering
133 PhyloLevel { _phylo_levelId :: PhyloLevelId
134 , _phylo_levelGroups :: [PhyloGroup]
136 deriving (Generic, Show, Eq)
144 -- | PhyloGroup : group of ngrams at each level and step
145 -- Label : maybe has a label as text
146 -- Ngrams: set of terms that build the group
147 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
148 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
149 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
150 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
152 PhyloGroup { _phylo_groupId :: PhyloGroupId
153 , _phylo_groupLabel :: Text
154 , _phylo_groupNgrams :: [Int]
155 , _phylo_groupMeta :: Map Text Double
156 , _phylo_groupCooc :: Map (Int, Int) Double
157 , _phylo_groupBranchId :: Maybe PhyloBranchId
159 , _phylo_groupPeriodParents :: [Pointer]
160 , _phylo_groupPeriodChilds :: [Pointer]
162 , _phylo_groupLevelParents :: [Pointer]
163 , _phylo_groupLevelChilds :: [Pointer]
165 deriving (Generic, Show, Eq, Ord)
168 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
170 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
174 type PhyloPeriodId = (Start, End)
175 type PhyloLevelId = (PhyloPeriodId, Level)
176 type PhyloGroupId = (PhyloLevelId, Index)
177 type PhyloBranchId = (Level, Index)
180 -- | Weight : A generic mesure that can be associated with an Id
182 -- | Pointer : A weighted linked with a given PhyloGroup
183 type Pointer = (PhyloGroupId, Weight)
184 -- | Ngrams : a contiguous sequence of n terms
193 -- | Document : a piece of Text linked to a Date
194 data Document = Document
199 -- | Clique : Set of ngrams cooccurring in the same Document
200 type Clique = Set Ngrams
201 -- | Support : Number of Documents where a Clique occurs
203 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
204 data PhyloFis = PhyloFis
205 { _phyloFis_clique :: Clique
206 , _phyloFis_support :: Support
207 , _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
210 -- | A list of clustered PhyloGroup
211 type PhyloCluster = [PhyloGroup]
214 -- | A List of PhyloGroup in a Graph
215 type GroupNodes = [PhyloGroup]
216 -- | A List of weighted links between some PhyloGroups in a Graph
217 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
218 -- | The association as a Graph between a list of Nodes and a list of Edges
219 type GroupGraph = (GroupNodes,GroupEdges)
227 data PhyloError = LevelDoesNotExist
237 -- | Cluster constructors
238 data Cluster = Fis FisParams
239 | RelatedComponents RCParams
240 | Louvain LouvainParams
243 -- | Parameters for Fis clustering
244 data FisParams = FisParams
245 { _fis_keepMinorFis :: Bool
246 , _fis_minSupport :: Support
247 } deriving (Show, Eq)
249 -- | Parameters for RelatedComponents clustering
250 data RCParams = RCParams
251 { _rc_proximity :: Proximity } deriving (Show, Eq)
253 -- | Parameters for Louvain clustering
254 data LouvainParams = LouvainParams
255 { _louvain_proximity :: Proximity } deriving (Show, Eq)
263 -- | Proximity constructors
264 data Proximity = WeightedLogJaccard WLJParams
265 | Hamming HammingParams
269 -- | Parameters for WeightedLogJaccard proximity
270 data WLJParams = WLJParams
271 { _wlj_threshold :: Double
272 , _wlj_sensibility :: Double
273 } deriving (Show, Eq)
275 -- | Parameters for Hamming proximity
276 data HammingParams = HammingParams
277 { _hamming_threshold :: Double } deriving (Show, Eq)
285 -- | Filter constructors
286 data Filter = SmallBranch SBParams deriving (Show, Eq)
288 -- | Parameters for SmallBranch filter
289 data SBParams = SBParams
290 { _sb_periodsInf :: Int
291 , _sb_periodsSup :: Int
292 , _sb_minNodes :: Int } deriving (Show, Eq)
300 -- | Metric constructors
301 data Metric = BranchAge deriving (Show, Eq)
309 -- | Tagger constructors
310 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
318 -- | Sort constructors
319 data Sort = ByBranchAge deriving (Show)
320 data Order = Asc | Desc deriving (Show)
328 -- | A Phyloquery describes a phylomemic reconstruction
329 data PhyloQuery = PhyloQuery
330 { _q_phyloTitle :: Text
331 , _q_phyloDesc :: Text
333 -- Grain and Steps for the PhyloPeriods
334 , _q_periodGrain :: Int
335 , _q_periodSteps :: Int
337 -- Clustering method for building the contextual unit of Phylo (ie: level 1)
338 , _q_contextualUnit :: Cluster
339 , _q_contextualUnitMetrics :: [Metric]
340 , _q_contextualUnitFilters :: [Filter]
342 -- Inter-temporal matching method of the Phylo
343 , _q_interTemporalMatching :: Proximity
345 -- Last level of reconstruction
346 , _q_nthLevel :: Level
347 -- Clustering method used from level 1 to nthLevel
348 , _q_nthCluster :: Cluster
349 } deriving (Show, Eq)
351 -- | To choose the Phylo edge you want to export : --> <-- <--> <=>
352 data Filiation = Ascendant | Descendant | Merge | Complete deriving (Show)
353 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
361 -- | A PhyloView is the output type of a Phylo
362 data PhyloView = PhyloView
363 { _phylo_viewParam :: PhyloParam
364 , _phylo_viewTitle :: Text
365 , _phylo_viewDescription :: Text
366 , _phylo_viewFiliation :: Filiation
367 , _phylo_viewMetrics :: Map Text [Double]
368 , _phylo_viewBranches :: [PhyloBranch]
369 , _phylo_viewNodes :: [PhyloNode]
370 , _phylo_viewEdges :: [PhyloEdge]
373 -- | A phyloview is made of PhyloBranches, edges and nodes
374 data PhyloBranch = PhyloBranch
375 { _phylo_branchId :: PhyloBranchId
376 , _phylo_branchLabel :: Text
377 , _phylo_branchMetrics :: Map Text [Double]
380 data PhyloEdge = PhyloEdge
381 { _phylo_edgeSource :: PhyloGroupId
382 , _phylo_edgeTarget :: PhyloGroupId
383 , _phylo_edgeType :: EdgeType
384 , _phylo_edgeWeight :: Weight
387 data PhyloNode = PhyloNode
388 { _phylo_nodeId :: PhyloGroupId
389 , _phylo_nodeBranchId :: Maybe PhyloBranchId
390 , _phylo_nodeLabel :: Text
391 , _phylo_nodeNgramsIdx :: [Int]
392 , _phylo_nodeNgrams :: Maybe [Ngrams]
393 , _phylo_nodeMetrics :: Map Text [Double]
394 , _phylo_nodeLevelParents :: Maybe [PhyloGroupId]
395 , _phylo_nodeLevelChilds :: [PhyloNode]
399 ------------------------
400 -- | PhyloQueryView | --
401 ------------------------
404 data DisplayMode = Flat | Nested
406 -- | A PhyloQueryView describes a Phylo as an output view
407 data PhyloQueryView = PhyloQueryView
410 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
411 , _qv_filiation :: Filiation
413 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
414 , _qv_levelChilds :: Bool
415 , _qv_levelChildsDepth :: Level
417 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
418 -- Firstly the metrics, then the filters and the taggers
419 , _qv_metrics :: [Metric]
420 , _qv_filters :: [Filter]
421 , _qv_taggers :: [Tagger]
423 -- An asc or desc sort to apply to the PhyloGraph
424 , _qv_sort :: Maybe (Sort,Order)
426 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
427 , _qv_display :: DisplayMode
428 , _qv_verbose :: Bool
437 makeLenses ''PhyloParam
438 makeLenses ''Software
441 makeLenses ''PhyloPeaks
442 makeLenses ''PhyloGroup
443 makeLenses ''PhyloLevel
444 makeLenses ''PhyloPeriod
445 makeLenses ''PhyloFis
447 makeLenses ''Proximity
451 makeLenses ''PhyloQuery
452 makeLenses ''PhyloQueryView
454 makeLenses ''PhyloView
455 makeLenses ''PhyloBranch
456 makeLenses ''PhyloNode
457 makeLenses ''PhyloEdge
460 ------------------------
461 -- | JSON instances | --
462 ------------------------
465 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
466 $(deriveJSON (unPrefix "_phylo_peaks" ) ''PhyloPeaks )
467 $(deriveJSON defaultOptions ''Tree )
468 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
469 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
470 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
471 $(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
473 $(deriveJSON (unPrefix "_software_" ) ''Software )
474 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
476 $(deriveJSON defaultOptions ''Filter )
477 $(deriveJSON defaultOptions ''Metric )
478 $(deriveJSON defaultOptions ''Cluster )
479 $(deriveJSON defaultOptions ''Proximity )
481 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
482 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
483 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
484 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
485 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
486 $(deriveJSON (unPrefix "_sb_" ) ''SBParams )
488 $(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
491 ----------------------------
492 -- | TODO XML instances | --
493 ----------------------------