]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
Add new types for Cluster, Proximity, Filter, etc
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON,defaultOptions)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45 ------------------------------------------------------------------------
46 data PhyloExport =
47 PhyloExport { _phyloExport_param :: PhyloParam
48 , _phyloExport_data :: Phylo
49 } deriving (Generic, Show)
50
51 -- | .phylo parameters
52 data PhyloParam =
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_params :: Hash
56 , _phyloParam_query :: Maybe PhyloQuery
57 } deriving (Generic, Show)
58
59 type Hash = Text
60
61 -- | Software
62 -- TODO move somewhere since it is generic
63 data Software =
64 Software { _software_name :: Text
65 , _software_version :: Text
66 } deriving (Generic, Show)
67
68 ------------------------------------------------------------------------
69
70 -- | Phylo datatype of a phylomemy
71 -- Duration : time Segment of the whole Phylo
72 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
73 -- Periods : list of all the periods of a Phylo
74 data Phylo =
75 Phylo { _phylo_duration :: (Start, End)
76 , _phylo_foundations :: Vector Ngrams
77 , _phylo_periods :: [PhyloPeriod]
78 }
79 deriving (Generic, Show)
80
81
82 -- | Date : a simple Integer
83 type Date = Int
84
85 -- | UTCTime in seconds since UNIX epoch
86 -- type Start = POSIXTime
87 -- type End = POSIXTime
88 type Start = Date
89 type End = Date
90
91 -- | PhyloStep : steps of phylomemy on temporal axis
92 -- Period: tuple (start date, end date) of the step of the phylomemy
93 -- Levels: levels of granularity
94 data PhyloPeriod =
95 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
96 , _phylo_periodLevels :: [PhyloLevel]
97 }
98 deriving (Generic, Show)
99
100
101 -- | PhyloLevel : levels of phylomemy on level axis
102 -- Levels description:
103 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
104 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
105 -- Level 1: First level of clustering
106 -- Level N: Nth level of clustering
107 data PhyloLevel =
108 PhyloLevel { _phylo_levelId :: PhyloLevelId
109 , _phylo_levelGroups :: [PhyloGroup]
110 }
111 deriving (Generic, Show)
112
113
114 -- | PhyloGroup : group of ngrams at each level and step
115 -- Label : maybe has a label as text
116 -- Ngrams: set of terms that build the group
117 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
118 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
119 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
120 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
121 data PhyloGroup =
122 PhyloGroup { _phylo_groupId :: PhyloGroupId
123 , _phylo_groupLabel :: Text
124 , _phylo_groupNgrams :: [Int]
125 , _phylo_groupMeta :: Map Text Double
126 , _phylo_groupCooc :: Map (Int, Int) Double
127 , _phylo_groupBranchId :: Maybe PhyloBranchId
128
129 , _phylo_groupPeriodParents :: [Pointer]
130 , _phylo_groupPeriodChilds :: [Pointer]
131
132 , _phylo_groupLevelParents :: [Pointer]
133 , _phylo_groupLevelChilds :: [Pointer]
134 }
135 deriving (Generic, Show, Eq, Ord)
136
137
138 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
139 type Level = Int
140 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
141 type Index = Int
142
143
144 type PhyloPeriodId = (Start, End)
145 type PhyloLevelId = (PhyloPeriodId, Level)
146 type PhyloGroupId = (PhyloLevelId, Index)
147 type PhyloBranchId = (Level, Index)
148
149
150 -- | Weight : A generic mesure that can be associated with an Id
151 type Weight = Double
152 -- | Pointer : A weighted linked with a given PhyloGroup
153 type Pointer = (PhyloGroupId, Weight)
154 -- | Ngrams : a contiguous sequence of n terms
155 type Ngrams = Text
156
157
158 -- | Aggregates | --
159
160
161 -- | Document : a piece of Text linked to a Date
162 data Document = Document
163 { date :: Date
164 , text :: Text
165 } deriving (Show)
166
167
168 -- | Clique : Set of ngrams cooccurring in the same Document
169 type Clique = Set Ngrams
170 -- | Support : Number of Documents where a Clique occurs
171 type Support = Int
172 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
173 type PhyloFis = (Clique,Support)
174
175
176 -- | A list of clustered PhyloGroup
177 type PhyloCluster = [PhyloGroup]
178
179
180 -- | A List of PhyloGroup in a Graph
181 type GroupNodes = [PhyloGroup]
182 -- | A List of weighted links between some PhyloGroups in a Graph
183 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
184 -- | The association as a Graph between a list of Nodes and a list of Edges
185 type GroupGraph = (GroupNodes,GroupEdges)
186
187
188 ---------------
189 -- | Error | --
190 ---------------
191
192 data PhyloError = LevelDoesNotExist
193 | LevelUnassigned
194 deriving (Show)
195
196 -----------------
197 -- | Cluster | --
198 -----------------
199
200 -- | Cluster constructors
201 data Cluster = Fis FisParams
202 | RelatedComponents RCParams
203 | Louvain LouvainParams
204 deriving (Show)
205
206 -- | Parameters for Fis clustering
207 data FisParams = FisParams
208 { _fis_filtered :: Bool
209 , _fis_keepMinorFis :: Bool
210 , _fis_minSupport :: Support
211 } deriving (Show)
212
213 -- | Parameters for RelatedComponents clustering
214 data RCParams = RCParams
215 { _rc_proximity :: Proximity } deriving (Show)
216
217 -- | Parameters for Louvain clustering
218 data LouvainParams = LouvainParams
219 { _louvain_proximity :: Proximity } deriving (Show)
220
221 -------------------
222 -- | Proximity | --
223 -------------------
224
225 -- | Proximity constructors
226 data Proximity = WeightedLogJaccard WLJParams
227 | Hamming HammingParams
228 | Filiation
229 deriving (Show)
230
231 -- | Parameters for WeightedLogJaccard proximity
232 data WLJParams = WLJParams
233 { _wlj_threshold :: Double
234 , _wlj_sensibility :: Double
235 } deriving (Show)
236
237 -- | Parameters for Hamming proximity
238 data HammingParams = HammingParams
239 { _hamming_threshold :: Double } deriving (Show)
240
241 ----------------
242 -- | Filter | --
243 ----------------
244
245 -- | Filter constructors
246 data Filter = LonelyBranch LBParams deriving (Show)
247
248 -- | Parameters for LonelyBranch filter
249 data LBParams = LBParams
250 { _lb_periodsInf :: Int
251 , _lb_periodsSup :: Int
252 , _lb_minNodes :: Int } deriving (Show)
253
254 ----------------
255 -- | Metric | --
256 ----------------
257
258 -- | Metric constructors
259 data Metric = BranchAge deriving (Show)
260
261 ----------------
262 -- | Tagger | --
263 ----------------
264
265 -- | Tagger constructors
266 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
267
268 --------------
269 -- | Sort | --
270 --------------
271
272 -- | Sort constructors
273 data Sort = ByBranchAge deriving (Show)
274 data Order = Asc | Desc deriving (Show)
275
276 --------------------
277 -- | PhyloQuery | --
278 --------------------
279
280 -- | A Phyloquery describes a phylomemic reconstruction
281 data PhyloQuery = PhyloQuery
282 { _q_phyloName :: Text
283 , _q_phyloDesc :: Text
284
285 -- Grain and Steps for the PhyloPeriods
286 , _q_periodGrain :: Int
287 , _q_periodSteps :: Int
288
289 -- Clustering method for making level 1 of the Phylo
290 , _q_cluster :: Cluster
291
292 -- Inter-temporal matching method of the Phylo
293 , _q_interTemporalMatching :: Proximity
294
295 -- Last level of reconstruction
296 , _q_nthLevel :: Level
297 -- Clustering method used from level 1 to nthLevel
298 , _q_nthCluster :: Cluster
299 } deriving (Show)
300
301 data Filiation = Ascendant | Descendant | Complete deriving (Show)
302 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
303
304 -------------------
305 -- | PhyloView | --
306 -------------------
307
308 -- | A PhyloView is the output type of a Phylo
309 data PhyloView = PhyloView
310 { _phylo_viewParam :: PhyloParam
311 , _phylo_viewLabel :: Text
312 , _phylo_viewDescription :: Text
313 , _phylo_viewFiliation :: Filiation
314 , _phylo_viewMeta :: Map Text Double
315 , _phylo_viewBranches :: [PhyloBranch]
316 , _phylo_viewNodes :: [PhyloNode]
317 , _phylo_viewEdges :: [PhyloEdge]
318 } deriving (Show)
319
320 -- | A phyloview is made of PhyloBranches, edges and nodes
321 data PhyloBranch = PhyloBranch
322 { _phylo_branchId :: PhyloBranchId
323 , _phylo_branchLabel :: Text
324 , _phylo_branchMeta :: Map Text Double
325 } deriving (Show)
326
327 data PhyloEdge = PhyloEdge
328 { _phylo_edgeSource :: PhyloGroupId
329 , _phylo_edgeTarget :: PhyloGroupId
330 , _phylo_edgeType :: EdgeType
331 , _phylo_edgeWeight :: Weight
332 } deriving (Show)
333
334 data PhyloNode = PhyloNode
335 { _phylo_nodeId :: PhyloGroupId
336 , _phylo_nodeBranchId :: Maybe PhyloBranchId
337 , _phylo_nodeLabel :: Text
338 , _phylo_nodeNgramsIdx :: [Int]
339 , _phylo_nodeNgrams :: Maybe [Ngrams]
340 , _phylo_nodeMeta :: Map Text Double
341 , _phylo_nodeParent :: Maybe PhyloGroupId
342 , _phylo_nodeChilds :: [PhyloNode]
343 } deriving (Show)
344
345 ------------------------
346 -- | PhyloQueryView | --
347 ------------------------
348
349 data DisplayMode = Flat | Nested
350
351 -- | A PhyloQueryView describes a Phylo as an output view
352 data PhyloQueryView = PhyloQueryView
353 { _qv_lvl :: Level
354
355 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ?
356 , _qv_filiation :: Filiation
357
358 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
359 , _qv_childs :: Bool
360 , _qv_childsDepth :: Level
361
362 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
363 -- Firstly the metrics, then the filters and the taggers
364 , _qv_metrics :: [Metric]
365 , _qv_filters :: [Filter]
366 , _qv_taggers :: [Tagger]
367
368 -- An asc or desc sort to apply to the PhyloGraph
369 , _qv_sort :: Maybe (Sort,Order)
370
371 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
372 , _qv_display :: DisplayMode
373 , _qv_verbose :: Bool
374 }
375
376 ----------------
377 -- | Lenses | --
378 ----------------
379
380 makeLenses ''PhyloParam
381 makeLenses ''PhyloExport
382 makeLenses ''Software
383 --
384 makeLenses ''Phylo
385 makeLenses ''PhyloGroup
386 makeLenses ''PhyloLevel
387 makeLenses ''PhyloPeriod
388 --
389 makeLenses ''Proximity
390 makeLenses ''Cluster
391 makeLenses ''Filter
392 --
393 makeLenses ''PhyloQuery
394 makeLenses ''PhyloQueryView
395 --
396 makeLenses ''PhyloView
397 makeLenses ''PhyloBranch
398 makeLenses ''PhyloNode
399 makeLenses ''PhyloEdge
400
401 ------------------------
402 -- | JSON instances | --
403 ------------------------
404
405 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
406 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
407 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
408 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
409 --
410 $(deriveJSON (unPrefix "_software_" ) ''Software )
411 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
412 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
413 --
414 $(deriveJSON defaultOptions ''Cluster )
415 $(deriveJSON defaultOptions ''Proximity )
416 --
417 $(deriveJSON (unPrefix "_fis_" ) ''FisParams )
418 $(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
419 $(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
420 $(deriveJSON (unPrefix "_rc_" ) ''RCParams )
421 $(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
422 --
423 $(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
424
425 ----------------------------
426 -- | TODO XML instances | --
427 ----------------------------
428