]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/AdaptativePhylo.hs
[FIX+DOC] haddock + warnings
[gargantext.git] / src / Gargantext / Viz / AdaptativePhylo.hs
1 {-|
2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22 -}
23
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
26
27 module Gargantext.Viz.AdaptativePhylo where
28
29 import Data.Aeson
30 import Data.Aeson.TH (deriveJSON)
31 import Data.Text (Text, pack)
32 import Data.Vector (Vector)
33 import Data.Map (Map)
34 import Data.Set (Set)
35
36 import Gargantext.Core.Utils.Prefix (unPrefix)
37 import Gargantext.Prelude
38 import Gargantext.Text.Context (TermList)
39
40 import GHC.Generics
41 import GHC.IO (FilePath)
42 import Control.DeepSeq (NFData)
43 import Control.Lens (makeLenses)
44
45 import qualified Data.Text.Lazy as TextLazy
46
47
48 ----------------
49 -- | Config | --
50 ----------------
51
52
53 data CorpusParser =
54 Wos {_wos_limit :: Int}
55 | Csv {_csv_limit :: Int}
56 deriving (Show,Generic,Eq)
57
58 data SeaElevation =
59 Constante
60 { _cons_start :: Double
61 , _cons_step :: Double }
62 | Adaptative
63 { _adap_granularity :: Double }
64 deriving (Show,Generic,Eq)
65
66 data Proximity =
67 WeightedLogJaccard
68 { _wlj_sensibility :: Double
69 -- , _wlj_thresholdInit :: Double
70 -- , _wlj_thresholdStep :: Double
71 -- | max height for sea level in temporal matching
72 -- , _wlj_elevation :: Double
73 }
74 | Hamming
75 deriving (Show,Generic,Eq)
76
77
78 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
79
80 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
81
82 data Synchrony =
83 ByProximityThreshold
84 { _bpt_threshold :: Double
85 , _bpt_sensibility :: Double
86 , _bpt_scope :: SynchronyScope
87 , _bpt_strategy :: SynchronyStrategy }
88 | ByProximityDistribution
89 { _bpd_sensibility :: Double
90 , _bpd_strategy :: SynchronyStrategy }
91 deriving (Show,Generic,Eq)
92
93
94 data TimeUnit =
95 Year
96 { _year_period :: Int
97 , _year_step :: Int
98 , _year_matchingFrame :: Int }
99 deriving (Show,Generic,Eq)
100
101
102 data Clique =
103 Fis
104 { _fis_support :: Int
105 , _fis_size :: Int }
106 | MaxClique
107 { _mcl_size :: Int }
108 deriving (Show,Generic,Eq)
109
110
111 data Quality =
112 Quality { _qua_granularity :: Double
113 , _qua_minBranch :: Int }
114 deriving (Show,Generic,Eq)
115
116
117 data Config =
118 Config { corpusPath :: FilePath
119 , listPath :: FilePath
120 , outputPath :: FilePath
121 , corpusParser :: CorpusParser
122 , phyloName :: Text
123 , phyloLevel :: Int
124 , phyloProximity :: Proximity
125 , seaElevation :: SeaElevation
126 , phyloSynchrony :: Synchrony
127 , phyloQuality :: Quality
128 , timeUnit :: TimeUnit
129 , clique :: Clique
130 , exportLabel :: [PhyloLabel]
131 , exportSort :: Sort
132 , exportFilter :: [Filter]
133 } deriving (Show,Generic,Eq)
134
135
136 defaultConfig :: Config
137 defaultConfig =
138 Config { corpusPath = ""
139 , listPath = ""
140 , outputPath = ""
141 , corpusParser = Csv 1000
142 , phyloName = pack "Default Phylo"
143 , phyloLevel = 2
144 , phyloProximity = WeightedLogJaccard 10
145 , seaElevation = Constante 0 0.1
146 , phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
147 , phyloQuality = Quality 0.6 1
148 , timeUnit = Year 3 1 5
149 , clique = Fis 1 5
150 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
151 , exportSort = ByHierarchy
152 , exportFilter = [ByBranchSize 2]
153 }
154
155 instance FromJSON Config
156 instance ToJSON Config
157 instance FromJSON CorpusParser
158 instance ToJSON CorpusParser
159 instance FromJSON Proximity
160 instance ToJSON Proximity
161 instance FromJSON SeaElevation
162 instance ToJSON SeaElevation
163 instance FromJSON TimeUnit
164 instance ToJSON TimeUnit
165 instance FromJSON Clique
166 instance ToJSON Clique
167 instance FromJSON PhyloLabel
168 instance ToJSON PhyloLabel
169 instance FromJSON Tagger
170 instance ToJSON Tagger
171 instance FromJSON Sort
172 instance ToJSON Sort
173 instance FromJSON Order
174 instance ToJSON Order
175 instance FromJSON Filter
176 instance ToJSON Filter
177 instance FromJSON SynchronyScope
178 instance ToJSON SynchronyScope
179 instance FromJSON SynchronyStrategy
180 instance ToJSON SynchronyStrategy
181 instance FromJSON Synchrony
182 instance ToJSON Synchrony
183 instance FromJSON Quality
184 instance ToJSON Quality
185
186
187 -- | Software parameters
188 data Software =
189 Software { _software_name :: Text
190 , _software_version :: Text
191 } deriving (Generic, Show, Eq)
192
193 defaultSoftware :: Software
194 defaultSoftware =
195 Software { _software_name = pack "Gargantext"
196 , _software_version = pack "v4" }
197
198
199 -- | Global parameters of a Phylo
200 data PhyloParam =
201 PhyloParam { _phyloParam_version :: Text
202 , _phyloParam_software :: Software
203 , _phyloParam_config :: Config
204 } deriving (Generic, Show, Eq)
205
206 defaultPhyloParam :: PhyloParam
207 defaultPhyloParam =
208 PhyloParam { _phyloParam_version = pack "v2.adaptative"
209 , _phyloParam_software = defaultSoftware
210 , _phyloParam_config = defaultConfig }
211
212
213 ------------------
214 -- | Document | --
215 ------------------
216
217
218 -- | Date : a simple Integer
219 type Date = Int
220
221 -- | Ngrams : a contiguous sequence of n terms
222 type Ngrams = Text
223
224 -- | Document : a piece of Text linked to a Date
225 data Document = Document
226 { date :: Date
227 , text :: [Ngrams]
228 } deriving (Eq,Show,Generic,NFData)
229
230
231 --------------------
232 -- | Foundation | --
233 --------------------
234
235
236 -- | The Foundations of a Phylo created from a given TermList
237 data PhyloFoundations = PhyloFoundations
238 { _foundations_roots :: !(Vector Ngrams)
239 , _foundations_mapList :: TermList
240 } deriving (Generic, Show, Eq)
241
242
243 ---------------------------
244 -- | Coocurency Matrix | --
245 ---------------------------
246
247
248 -- | Cooc : a coocurency matrix between two ngrams
249 type Cooc = Map (Int,Int) Double
250
251
252 -------------------
253 -- | Phylomemy | --
254 -------------------
255
256
257 -- | Phylo datatype of a phylomemy
258 -- foundations : the foundations of the phylo
259 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
260 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
261 -- param : the parameters of the phylomemy (with the user's configuration)
262 -- periods : the temporal steps of a phylomemy
263 data Phylo =
264 Phylo { _phylo_foundations :: PhyloFoundations
265 , _phylo_timeCooc :: !(Map Date Cooc)
266 , _phylo_timeDocs :: !(Map Date Double)
267 , _phylo_termFreq :: !(Map Int Double)
268 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
269 , _phylo_param :: PhyloParam
270 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
271 }
272 deriving (Generic, Show, Eq)
273
274
275 -- | PhyloPeriodId : the id of a given period
276 type PhyloPeriodId = (Date,Date)
277
278 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
279 -- id: tuple (start date, end date) of the temporal step of the phylomemy
280 -- levels: levels of granularity
281 data PhyloPeriod =
282 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
283 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
284 } deriving (Generic, Show, Eq)
285
286
287 -- | Level : a level of clustering
288 type Level = Int
289
290 -- | PhyloLevelId : the id of a level of clustering in a given period
291 type PhyloLevelId = (PhyloPeriodId,Level)
292
293 -- | PhyloLevel : levels of phylomemy on a synchronic axis
294 -- Levels description:
295 -- Level 0: The foundations and the base of the phylo
296 -- Level 1: First level of clustering (the Fis)
297 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
298 data PhyloLevel =
299 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
300 , _phylo_levelLevel :: Level
301 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
302 }
303 deriving (Generic, Show, Eq)
304
305
306 type PhyloGroupId = (PhyloLevelId, Int)
307
308 -- | BranchId : (a level, a sequence of branch index)
309 -- the sequence is a path of heritage from the most to the less specific branch
310 type PhyloBranchId = (Level, [Int])
311
312 -- | PhyloGroup : group of ngrams at each level and period
313 data PhyloGroup =
314 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
315 , _phylo_groupLevel :: Level
316 , _phylo_groupIndex :: Int
317 , _phylo_groupLabel :: Text
318 , _phylo_groupSupport :: Support
319 , _phylo_groupNgrams :: [Int]
320 , _phylo_groupCooc :: !(Cooc)
321 , _phylo_groupBranchId :: PhyloBranchId
322 , _phylo_groupMeta :: Map Text [Double]
323 , _phylo_groupLevelParents :: [Pointer]
324 , _phylo_groupLevelChilds :: [Pointer]
325 , _phylo_groupPeriodParents :: [Pointer]
326 , _phylo_groupPeriodChilds :: [Pointer]
327 }
328 deriving (Generic, Show, Eq, NFData)
329
330 -- | Weight : A generic mesure that can be associated with an Id
331 type Weight = Double
332
333 -- | Pointer : A weighted pointer to a given PhyloGroup
334 type Pointer = (PhyloGroupId, Weight)
335
336 data Filiation = ToParents | ToChilds deriving (Generic, Show)
337 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
338
339
340 ----------------------
341 -- | Phylo Clique | --
342 ----------------------
343
344 -- | Support : Number of Documents where a Clique occurs
345 type Support = Int
346
347 data PhyloClique = PhyloClique
348 { _phyloClique_nodes :: Set Ngrams
349 , _phyloClique_support :: Support
350 , _phyloClique_period :: (Date,Date)
351 } deriving (Generic,NFData,Show,Eq)
352
353
354 ----------------
355 -- | Export | --
356 ----------------
357
358 type DotId = TextLazy.Text
359
360 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
361
362 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
363
364 data Order = Asc | Desc deriving (Show,Generic,Eq)
365
366 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
367
368 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
369
370 data PhyloLabel =
371 BranchLabel
372 { _branch_labelTagger :: Tagger
373 , _branch_labelSize :: Int }
374 | GroupLabel
375 { _group_labelTagger :: Tagger
376 , _group_labelSize :: Int }
377 deriving (Show,Generic,Eq)
378
379 data PhyloBranch =
380 PhyloBranch
381 { _branch_id :: PhyloBranchId
382 , _branch_canonId :: [Int]
383 , _branch_seaLevel :: [Double]
384 , _branch_x :: Double
385 , _branch_y :: Double
386 , _branch_w :: Double
387 , _branch_t :: Double
388 , _branch_label :: Text
389 , _branch_meta :: Map Text [Double]
390 } deriving (Generic, Show, Eq)
391
392 data PhyloExport =
393 PhyloExport
394 { _export_groups :: [PhyloGroup]
395 , _export_branches :: [PhyloBranch]
396 } deriving (Generic, Show)
397
398 ----------------
399 -- | Lenses | --
400 ----------------
401
402 makeLenses ''Config
403 makeLenses ''Proximity
404 makeLenses ''SeaElevation
405 makeLenses ''Quality
406 makeLenses ''Clique
407 makeLenses ''PhyloLabel
408 makeLenses ''TimeUnit
409 makeLenses ''PhyloFoundations
410 makeLenses ''PhyloClique
411 makeLenses ''Phylo
412 makeLenses ''PhyloPeriod
413 makeLenses ''PhyloLevel
414 makeLenses ''PhyloGroup
415 makeLenses ''PhyloParam
416 makeLenses ''PhyloExport
417 makeLenses ''PhyloBranch
418
419 ------------------------
420 -- | JSON instances | --
421 ------------------------
422
423
424 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)