]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/AdaptativePhylo.hs
change beta to lambda [0...1]
[gargantext.git] / src / Gargantext / Core / Viz / AdaptativePhylo.hs
1 {-|
2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22 -}
23
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
26
27 module Gargantext.Core.Viz.AdaptativePhylo where
28
29 import Data.Aeson
30 import Data.Aeson.TH (deriveJSON)
31 import Data.Text (Text, pack)
32 import Data.Vector (Vector)
33 import Data.Map (Map)
34
35 import Gargantext.Core.Utils.Prefix (unPrefix)
36 import Gargantext.Prelude
37 import Gargantext.Core.Text.Context (TermList)
38
39 import GHC.Generics
40 import GHC.IO (FilePath)
41 import Control.DeepSeq (NFData)
42 import Control.Lens (makeLenses)
43
44 import qualified Data.Text.Lazy as TextLazy
45
46
47 ----------------
48 -- | Config | --
49 ----------------
50
51
52 data CorpusParser =
53 Wos {_wos_limit :: Int}
54 | Csv {_csv_limit :: Int}
55 deriving (Show,Generic,Eq)
56
57 data SeaElevation =
58 Constante
59 { _cons_start :: Double
60 , _cons_step :: Double }
61 | Adaptative
62 { _adap_granularity :: Double }
63 deriving (Show,Generic,Eq)
64
65 data Proximity =
66 WeightedLogJaccard
67 { _wlj_sensibility :: Double
68 {-
69 -- , _wlj_thresholdInit :: Double
70 -- , _wlj_thresholdStep :: Double
71 -- | max height for sea level in temporal matching
72 -- , _wlj_elevation :: Double
73 -}
74 }
75 | Hamming
76 deriving (Show,Generic,Eq)
77
78
79 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
80
81 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
82
83 data Synchrony =
84 ByProximityThreshold
85 { _bpt_threshold :: Double
86 , _bpt_sensibility :: Double
87 , _bpt_scope :: SynchronyScope
88 , _bpt_strategy :: SynchronyStrategy }
89 | ByProximityDistribution
90 { _bpd_sensibility :: Double
91 , _bpd_strategy :: SynchronyStrategy }
92 deriving (Show,Generic,Eq)
93
94
95 data TimeUnit =
96 Year
97 { _year_period :: Int
98 , _year_step :: Int
99 , _year_matchingFrame :: Int }
100 deriving (Show,Generic,Eq)
101
102
103 data Clique =
104 Fis
105 { _fis_support :: Int
106 , _fis_size :: Int }
107 | MaxClique
108 { _mcl_size :: Int }
109 deriving (Show,Generic,Eq)
110
111
112 data Quality =
113 Quality { _qua_granularity :: Double
114 , _qua_minBranch :: Int }
115 deriving (Show,Generic,Eq)
116
117
118 data Config =
119 Config { corpusPath :: FilePath
120 , listPath :: FilePath
121 , outputPath :: FilePath
122 , corpusParser :: CorpusParser
123 , phyloName :: Text
124 , phyloLevel :: Int
125 , phyloProximity :: Proximity
126 , seaElevation :: SeaElevation
127 , findAncestors :: Bool
128 , phyloSynchrony :: Synchrony
129 , phyloQuality :: Quality
130 , timeUnit :: TimeUnit
131 , clique :: Clique
132 , exportLabel :: [PhyloLabel]
133 , exportSort :: Sort
134 , exportFilter :: [Filter]
135 } deriving (Show,Generic,Eq)
136
137
138 defaultConfig :: Config
139 defaultConfig =
140 Config { corpusPath = ""
141 , listPath = ""
142 , outputPath = ""
143 , corpusParser = Csv 1000
144 , phyloName = pack "Default Phylo"
145 , phyloLevel = 2
146 , phyloProximity = WeightedLogJaccard 10
147 , seaElevation = Constante 0.1 0.1
148 , findAncestors = True
149 , phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
150 , phyloQuality = Quality 0 1
151 , timeUnit = Year 3 1 5
152 , clique = MaxClique 0
153 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
154 , exportSort = ByHierarchy
155 , exportFilter = [ByBranchSize 2]
156 }
157
158 instance FromJSON Config
159 instance ToJSON Config
160 instance FromJSON CorpusParser
161 instance ToJSON CorpusParser
162 instance FromJSON Proximity
163 instance ToJSON Proximity
164 instance FromJSON SeaElevation
165 instance ToJSON SeaElevation
166 instance FromJSON TimeUnit
167 instance ToJSON TimeUnit
168 instance FromJSON Clique
169 instance ToJSON Clique
170 instance FromJSON PhyloLabel
171 instance ToJSON PhyloLabel
172 instance FromJSON Tagger
173 instance ToJSON Tagger
174 instance FromJSON Sort
175 instance ToJSON Sort
176 instance FromJSON Order
177 instance ToJSON Order
178 instance FromJSON Filter
179 instance ToJSON Filter
180 instance FromJSON SynchronyScope
181 instance ToJSON SynchronyScope
182 instance FromJSON SynchronyStrategy
183 instance ToJSON SynchronyStrategy
184 instance FromJSON Synchrony
185 instance ToJSON Synchrony
186 instance FromJSON Quality
187 instance ToJSON Quality
188
189
190 -- | Software parameters
191 data Software =
192 Software { _software_name :: Text
193 , _software_version :: Text
194 } deriving (Generic, Show, Eq)
195
196 defaultSoftware :: Software
197 defaultSoftware =
198 Software { _software_name = pack "Gargantext"
199 , _software_version = pack "v4" }
200
201
202 -- | Global parameters of a Phylo
203 data PhyloParam =
204 PhyloParam { _phyloParam_version :: Text
205 , _phyloParam_software :: Software
206 , _phyloParam_config :: Config
207 } deriving (Generic, Show, Eq)
208
209 defaultPhyloParam :: PhyloParam
210 defaultPhyloParam =
211 PhyloParam { _phyloParam_version = pack "v2.adaptative"
212 , _phyloParam_software = defaultSoftware
213 , _phyloParam_config = defaultConfig }
214
215
216 ------------------
217 -- | Document | --
218 ------------------
219
220
221 -- | Date : a simple Integer
222 type Date = Int
223
224 -- | Ngrams : a contiguous sequence of n terms
225 type Ngrams = Text
226
227 -- | Document : a piece of Text linked to a Date
228 data Document = Document
229 { date :: Date
230 , text :: [Ngrams]
231 } deriving (Eq,Show,Generic,NFData)
232
233
234 --------------------
235 -- | Foundation | --
236 --------------------
237
238
239 -- | The Foundations of a Phylo created from a given TermList
240 data PhyloFoundations = PhyloFoundations
241 { _foundations_roots :: !(Vector Ngrams)
242 , _foundations_mapList :: TermList
243 } deriving (Generic, Show, Eq)
244
245
246 ---------------------------
247 -- | Coocurency Matrix | --
248 ---------------------------
249
250
251 -- | Cooc : a coocurency matrix between two ngrams
252 type Cooc = Map (Int,Int) Double
253
254
255 -------------------
256 -- | Phylomemy | --
257 -------------------
258
259
260 -- | Phylo datatype of a phylomemy
261 -- foundations : the foundations of the phylo
262 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
263 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
264 -- param : the parameters of the phylomemy (with the user's configuration)
265 -- periods : the temporal steps of a phylomemy
266 data Phylo =
267 Phylo { _phylo_foundations :: PhyloFoundations
268 , _phylo_timeCooc :: !(Map Date Cooc)
269 , _phylo_timeDocs :: !(Map Date Double)
270 , _phylo_termFreq :: !(Map Int Double)
271 , _phylo_lastTermFreq :: !(Map Int Double)
272 , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
273 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
274 , _phylo_param :: PhyloParam
275 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
276 }
277 deriving (Generic, Show, Eq)
278
279
280 -- | PhyloPeriodId : the id of a given period
281 type PhyloPeriodId = (Date,Date)
282
283 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
284 -- id: tuple (start date, end date) of the temporal step of the phylomemy
285 -- levels: levels of granularity
286 data PhyloPeriod =
287 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
288 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
289 } deriving (Generic, Show, Eq)
290
291
292 -- | Level : a level of clustering
293 type Level = Int
294
295 -- | PhyloLevelId : the id of a level of clustering in a given period
296 type PhyloLevelId = (PhyloPeriodId,Level)
297
298 -- | PhyloLevel : levels of phylomemy on a synchronic axis
299 -- Levels description:
300 -- Level 0: The foundations and the base of the phylo
301 -- Level 1: First level of clustering (the Fis)
302 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
303 data PhyloLevel =
304 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
305 , _phylo_levelLevel :: Level
306 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
307 }
308 deriving (Generic, Show, Eq)
309
310
311 type PhyloGroupId = (PhyloLevelId, Int)
312
313 -- | BranchId : (a level, a sequence of branch index)
314 -- the sequence is a path of heritage from the most to the less specific branch
315 type PhyloBranchId = (Level, [Int])
316
317 -- | PhyloGroup : group of ngrams at each level and period
318 data PhyloGroup =
319 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
320 , _phylo_groupLevel :: Level
321 , _phylo_groupIndex :: Int
322 , _phylo_groupLabel :: Text
323 , _phylo_groupSupport :: Support
324 , _phylo_groupNgrams :: [Int]
325 , _phylo_groupCooc :: !(Cooc)
326 , _phylo_groupBranchId :: PhyloBranchId
327 , _phylo_groupMeta :: Map Text [Double]
328 , _phylo_groupLevelParents :: [Pointer]
329 , _phylo_groupLevelChilds :: [Pointer]
330 , _phylo_groupPeriodParents :: [Pointer]
331 , _phylo_groupPeriodChilds :: [Pointer]
332 , _phylo_groupAncestors :: [Pointer]
333 }
334 deriving (Generic, Show, Eq, NFData)
335
336 -- | Weight : A generic mesure that can be associated with an Id
337 type Weight = Double
338
339 -- | Pointer : A weighted pointer to a given PhyloGroup
340 type Pointer = (PhyloGroupId, Weight)
341
342 data Filiation = ToParents | ToChilds deriving (Generic, Show)
343 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
344
345
346 ----------------------
347 -- | Phylo Clique | --
348 ----------------------
349
350 -- | Support : Number of Documents where a Clique occurs
351 type Support = Int
352
353 data PhyloClique = PhyloClique
354 { _phyloClique_nodes :: [Int]
355 , _phyloClique_support :: Support
356 , _phyloClique_period :: (Date,Date)
357 } deriving (Generic,NFData,Show,Eq)
358
359 ----------------
360 -- | Export | --
361 ----------------
362
363 type DotId = TextLazy.Text
364
365 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
366
367 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
368
369 data Order = Asc | Desc deriving (Show,Generic,Eq)
370
371 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
372
373 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
374
375 data PhyloLabel =
376 BranchLabel
377 { _branch_labelTagger :: Tagger
378 , _branch_labelSize :: Int }
379 | GroupLabel
380 { _group_labelTagger :: Tagger
381 , _group_labelSize :: Int }
382 deriving (Show,Generic,Eq)
383
384 data PhyloBranch =
385 PhyloBranch
386 { _branch_id :: PhyloBranchId
387 , _branch_canonId :: [Int]
388 , _branch_seaLevel :: [Double]
389 , _branch_x :: Double
390 , _branch_y :: Double
391 , _branch_w :: Double
392 , _branch_t :: Double
393 , _branch_label :: Text
394 , _branch_meta :: Map Text [Double]
395 } deriving (Generic, Show, Eq)
396
397 data PhyloExport =
398 PhyloExport
399 { _export_groups :: [PhyloGroup]
400 , _export_branches :: [PhyloBranch]
401 } deriving (Generic, Show)
402
403 ----------------
404 -- | Lenses | --
405 ----------------
406
407 makeLenses ''Config
408 makeLenses ''Proximity
409 makeLenses ''SeaElevation
410 makeLenses ''Quality
411 makeLenses ''Clique
412 makeLenses ''PhyloLabel
413 makeLenses ''TimeUnit
414 makeLenses ''PhyloFoundations
415 makeLenses ''PhyloClique
416 makeLenses ''Phylo
417 makeLenses ''PhyloPeriod
418 makeLenses ''PhyloLevel
419 makeLenses ''PhyloGroup
420 makeLenses ''PhyloParam
421 makeLenses ''PhyloExport
422 makeLenses ''PhyloBranch
423
424 ------------------------
425 -- | JSON instances | --
426 ------------------------
427
428
429 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)