2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
45 ------------------------------------------------------------------------
47 PhyloExport { _phyloExport_param :: PhyloParam
48 , _phyloExport_data :: Phylo
49 } deriving (Generic, Show)
51 -- | .phylo parameters
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_params :: Hash
56 , _phyloParam_query :: Maybe PhyloQuery
57 } deriving (Generic, Show)
62 -- TODO move somewhere since it is generic
64 Software { _software_name :: Text
65 , _software_version :: Text
66 } deriving (Generic, Show)
68 ------------------------------------------------------------------------
70 -- | Phylo datatype of a phylomemy
71 -- Duration : time Segment of the whole Phylo
72 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
73 -- Periods : list of all the periods of a Phylo
75 Phylo { _phylo_duration :: (Start, End)
76 , _phylo_foundations :: Vector Ngrams
77 , _phylo_periods :: [PhyloPeriod]
79 deriving (Generic, Show)
82 -- | Date : a simple Integer
85 -- | UTCTime in seconds since UNIX epoch
86 -- type Start = POSIXTime
87 -- type End = POSIXTime
91 -- | PhyloStep : steps of phylomemy on temporal axis
92 -- Period: tuple (start date, end date) of the step of the phylomemy
93 -- Levels: levels of granularity
95 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
96 , _phylo_periodLevels :: [PhyloLevel]
98 deriving (Generic, Show)
101 -- | PhyloLevel : levels of phylomemy on level axis
102 -- Levels description:
103 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
104 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
105 -- Level 1: First level of clustering
106 -- Level N: Nth level of clustering
108 PhyloLevel { _phylo_levelId :: PhyloLevelId
109 , _phylo_levelGroups :: [PhyloGroup]
111 deriving (Generic, Show)
114 -- | PhyloGroup : group of ngrams at each level and step
115 -- Label : maybe has a label as text
116 -- Ngrams: set of terms that build the group
117 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
118 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
119 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
120 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
122 PhyloGroup { _phylo_groupId :: PhyloGroupId
123 , _phylo_groupLabel :: Text
124 , _phylo_groupNgrams :: [Int]
125 , _phylo_groupMeta :: Map Text Double
126 , _phylo_groupCooc :: Map (Int, Int) Double
127 , _phylo_groupBranchId :: Maybe PhyloBranchId
129 , _phylo_groupPeriodParents :: [Pointer]
130 , _phylo_groupPeriodChilds :: [Pointer]
132 , _phylo_groupLevelParents :: [Pointer]
133 , _phylo_groupLevelChilds :: [Pointer]
135 deriving (Generic, Show, Eq, Ord)
138 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
140 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
144 type PhyloPeriodId = (Start, End)
145 type PhyloLevelId = (PhyloPeriodId, Level)
146 type PhyloGroupId = (PhyloLevelId, Index)
147 type PhyloBranchId = (Level, Index)
150 -- | Weight : A generic mesure that can be associated with an Id
152 -- | Pointer : A weighted linked with a given PhyloGroup
153 type Pointer = (PhyloGroupId, Weight)
154 -- | Ngrams : a contiguous sequence of n terms
161 -- | Document : a piece of Text linked to a Date
162 data Document = Document
168 -- | Clique : Set of ngrams cooccurring in the same Document
169 type Clique = Set Ngrams
170 -- | Support : Number of Documents where a Clique occurs
172 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
173 type PhyloFis = (Clique,Support)
176 -- | A list of clustered PhyloGroup
177 type PhyloCluster = [PhyloGroup]
180 -- | A List of PhyloGroup in a Graph
181 type GroupNodes = [PhyloGroup]
182 -- | A List of weighted links between some PhyloGroups in a Graph
183 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
184 -- | The association as a Graph between a list of Nodes and a list of Edges
185 type GroupGraph = (GroupNodes,GroupEdges)
192 data PhyloError = LevelDoesNotExist
200 -- | Cluster constructors
201 data Cluster = Fis FisParams
202 | RelatedComponents RCParams
203 | Louvain LouvainParams
206 -- | Parameters for Fis clustering
207 data FisParams = FisParams
208 { _fis_filtered :: Bool
209 , _fis_keepMinorFis :: Bool
210 , _fis_minSupport :: Support
213 -- | Parameters for RelatedComponents clustering
214 data RCParams = RCParams
215 { _rc_proximity :: Proximity } deriving (Show)
217 -- | Parameters for Louvain clustering
218 data LouvainParams = LouvainParams
219 { _louvain_proximity :: Proximity } deriving (Show)
225 -- | Proximity constructors
226 data Proximity = WeightedLogJaccard WLJParams
227 | Hamming HammingParams
231 -- | Parameters for WeightedLogJaccard proximity
232 data WLJParams = WLJParams
233 { _wlj_threshold :: Double
234 , _wlj_sensibility :: Double
237 -- | Parameters for Hamming proximity
238 data HammingParams = HammingParams
239 { _hamming_threshold :: Double } deriving (Show)
245 -- | Filter constructors
246 data Filter = LonelyBranch LBParams deriving (Show)
248 -- | Parameters for LonelyBranch filter
249 data LBParams = LBParams
250 { _lb_periodsInf :: Int
251 , _lb_periodsSup :: Int
252 , _lb_minNodes :: Int } deriving (Show)
258 -- | Metric constructors
259 data Metric = BranchAge deriving (Show)
265 -- | Tagger constructors
266 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
272 -- | Sort constructors
273 data Sort = ByBranchAge deriving (Show)
274 data Order = Asc | Desc deriving (Show)
280 -- | A Phyloquery describes a phylomemic reconstruction
281 data PhyloQuery = PhyloQuery
282 { _q_phyloName :: Text
283 , _q_phyloDesc :: Text
285 -- Grain and Steps for the PhyloPeriods
286 , _q_periodGrain :: Int
287 , _q_periodSteps :: Int
289 -- Clustering method for making level 1 of the Phylo
290 , _q_cluster :: Cluster
292 -- Inter-temporal matching method of the Phylo
293 , _q_interTemporalMatching :: Proximity
295 -- Last level of reconstruction
296 , _q_nthLevel :: Level
297 -- Clustering method used from level 1 to nthLevel
298 , _q_nthCluster :: Cluster
301 data Filiation = Ascendant | Descendant | Complete deriving (Show)
302 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
308 -- | A PhyloView is the output type of a Phylo
309 data PhyloView = PhyloView
310 { _phylo_viewParam :: PhyloParam
311 , _phylo_viewLabel :: Text
312 , _phylo_viewDescription :: Text
313 , _phylo_viewFiliation :: Filiation
314 , _phylo_viewMeta :: Map Text Double
315 , _phylo_viewBranches :: [PhyloBranch]
316 , _phylo_viewNodes :: [PhyloNode]
317 , _phylo_viewEdges :: [PhyloEdge]
320 -- | A phyloview is made of PhyloBranches, edges and nodes
321 data PhyloBranch = PhyloBranch
322 { _phylo_branchId :: PhyloBranchId
323 , _phylo_branchLabel :: Text
324 , _phylo_branchMeta :: Map Text Double
327 data PhyloEdge = PhyloEdge
328 { _phylo_edgeSource :: PhyloGroupId
329 , _phylo_edgeTarget :: PhyloGroupId
330 , _phylo_edgeType :: EdgeType
331 , _phylo_edgeWeight :: Weight
334 data PhyloNode = PhyloNode
335 { _phylo_nodeId :: PhyloGroupId
336 , _phylo_nodeBranchId :: Maybe PhyloBranchId
337 , _phylo_nodeLabel :: Text
338 , _phylo_nodeNgramsIdx :: [Int]
339 , _phylo_nodeNgrams :: Maybe [Ngrams]
340 , _phylo_nodeMeta :: Map Text Double
341 , _phylo_nodeParent :: Maybe PhyloGroupId
342 , _phylo_nodeChilds :: [PhyloNode]
345 ------------------------
346 -- | PhyloQueryView | --
347 ------------------------
349 data DisplayMode = Flat | Nested
351 -- | A PhyloQueryView describes a Phylo as an output view
352 data PhyloQueryView = PhyloQueryView
355 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ?
356 , _qv_filiation :: Filiation
358 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
360 , _qv_childsDepth :: Level
362 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
363 -- Firstly the metrics, then the filters and the taggers
364 , _qv_metrics :: [Metric]
365 , _qv_filters :: [Filter]
366 , _qv_taggers :: [Tagger]
368 -- An asc or desc sort to apply to the PhyloGraph
369 , _qv_sort :: Maybe (Sort,Order)
371 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
372 , _qv_display :: DisplayMode
373 , _qv_verbose :: Bool
380 makeLenses ''PhyloParam
381 makeLenses ''PhyloExport
382 makeLenses ''Software
385 makeLenses ''PhyloGroup
386 makeLenses ''PhyloLevel
387 makeLenses ''PhyloPeriod
389 makeLenses ''Proximity
393 makeLenses ''PhyloQuery
394 makeLenses ''PhyloQueryView
396 makeLenses ''PhyloView
397 makeLenses ''PhyloBranch
398 makeLenses ''PhyloNode
399 makeLenses ''PhyloEdge
401 ------------------------
402 -- | JSON instances | --
403 ------------------------
405 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
406 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
407 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
408 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
410 $(deriveJSON (unPrefix "_software_" ) ''Software )
411 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
412 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
414 $(deriveJSON defaultOptions ''Cluster )
415 $(deriveJSON defaultOptions ''Proximity )
417 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
418 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
419 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
420 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
421 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
423 $(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
425 ----------------------------
426 -- | TODO XML instances | --
427 ----------------------------