]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/AdaptativePhylo.hs
add export logs
[gargantext.git] / src / Gargantext / Viz / AdaptativePhylo.hs
1 {-|
2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22 -}
23
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
28
29 module Gargantext.Viz.AdaptativePhylo where
30
31 import Data.Aeson
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
35 import Data.Map (Map)
36 import Data.Set (Set)
37
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
41
42 import GHC.Generics
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
46
47 import qualified Data.Text.Lazy as TextLazy
48
49
50 ----------------
51 -- | Config | --
52 ----------------
53
54
55 data CorpusParser =
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
59
60
61 data Proximity =
62 WeightedLogJaccard
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
66 | Hamming
67 deriving (Show,Generic,Eq)
68
69
70 data Synchrony =
71 ByProximityThreshold
72 { _bpt_threshold :: Double
73 , _bpt_sensibility :: Double}
74 | ByProximityDistribution
75 { _bpd_sensibility :: Double}
76 deriving (Show,Generic,Eq)
77
78
79 data TimeUnit =
80 Year
81 { _year_period :: Int
82 , _year_step :: Int
83 , _year_matchingFrame :: Int }
84 deriving (Show,Generic,Eq)
85
86
87 data ContextualUnit =
88 Fis
89 { _fis_support :: Int
90 , _fis_size :: Int }
91 | MaxClique
92 { _clique_size :: Int }
93 deriving (Show,Generic,Eq)
94
95
96 data Quality =
97 Quality { _qua_granularity :: Double
98 , _qua_minBranch :: Int }
99 deriving (Show,Generic,Eq)
100
101
102 data Config =
103 Config { corpusPath :: FilePath
104 , listPath :: FilePath
105 , outputPath :: FilePath
106 , corpusParser :: CorpusParser
107 , phyloName :: Text
108 , phyloLevel :: Int
109 , phyloProximity :: Proximity
110 , phyloSynchrony :: Synchrony
111 , phyloQuality :: Quality
112 , timeUnit :: TimeUnit
113 , contextualUnit :: ContextualUnit
114 , exportLabel :: [PhyloLabel]
115 , exportSort :: Sort
116 , exportFilter :: [Filter]
117 } deriving (Show,Generic,Eq)
118
119
120 defaultConfig :: Config
121 defaultConfig =
122 Config { corpusPath = ""
123 , listPath = ""
124 , outputPath = ""
125 , corpusParser = Csv 1000
126 , phyloName = pack "Default Phylo"
127 , phyloLevel = 2
128 , phyloProximity = WeightedLogJaccard 10 0 0.1
129 , phyloSynchrony = ByProximityDistribution 0
130 , phyloQuality = Quality 0.5 1
131 , timeUnit = Year 3 1 5
132 , contextualUnit = Fis 1 5
133 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
134 , exportSort = ByHierarchy
135 , exportFilter = [ByBranchSize 2]
136 }
137
138 instance FromJSON Config
139 instance ToJSON Config
140 instance FromJSON CorpusParser
141 instance ToJSON CorpusParser
142 instance FromJSON Proximity
143 instance ToJSON Proximity
144 instance FromJSON TimeUnit
145 instance ToJSON TimeUnit
146 instance FromJSON ContextualUnit
147 instance ToJSON ContextualUnit
148 instance FromJSON PhyloLabel
149 instance ToJSON PhyloLabel
150 instance FromJSON Tagger
151 instance ToJSON Tagger
152 instance FromJSON Sort
153 instance ToJSON Sort
154 instance FromJSON Order
155 instance ToJSON Order
156 instance FromJSON Filter
157 instance ToJSON Filter
158 instance FromJSON Synchrony
159 instance ToJSON Synchrony
160 instance FromJSON Quality
161 instance ToJSON Quality
162
163
164 -- | Software parameters
165 data Software =
166 Software { _software_name :: Text
167 , _software_version :: Text
168 } deriving (Generic, Show, Eq)
169
170 defaultSoftware :: Software
171 defaultSoftware =
172 Software { _software_name = pack "Gargantext"
173 , _software_version = pack "v4" }
174
175
176 -- | Global parameters of a Phylo
177 data PhyloParam =
178 PhyloParam { _phyloParam_version :: Text
179 , _phyloParam_software :: Software
180 , _phyloParam_config :: Config
181 } deriving (Generic, Show, Eq)
182
183 defaultPhyloParam :: PhyloParam
184 defaultPhyloParam =
185 PhyloParam { _phyloParam_version = pack "v2.adaptative"
186 , _phyloParam_software = defaultSoftware
187 , _phyloParam_config = defaultConfig }
188
189
190 ------------------
191 -- | Document | --
192 ------------------
193
194
195 -- | Date : a simple Integer
196 type Date = Int
197
198 -- | Ngrams : a contiguous sequence of n terms
199 type Ngrams = Text
200
201 -- | Document : a piece of Text linked to a Date
202 data Document = Document
203 { date :: Date
204 , text :: [Ngrams]
205 } deriving (Eq,Show,Generic,NFData)
206
207
208 --------------------
209 -- | Foundation | --
210 --------------------
211
212
213 -- | The Foundations of a Phylo created from a given TermList
214 data PhyloFoundations = PhyloFoundations
215 { _foundations_roots :: !(Vector Ngrams)
216 , _foundations_mapList :: TermList
217 } deriving (Generic, Show, Eq)
218
219
220 ---------------------------
221 -- | Coocurency Matrix | --
222 ---------------------------
223
224
225 -- | Cooc : a coocurency matrix between two ngrams
226 type Cooc = Map (Int,Int) Double
227
228
229 -------------------
230 -- | Phylomemy | --
231 -------------------
232
233
234 -- | Phylo datatype of a phylomemy
235 -- foundations : the foundations of the phylo
236 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
237 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
238 -- param : the parameters of the phylomemy (with the user's configuration)
239 -- periods : the temporal steps of a phylomemy
240 data Phylo =
241 Phylo { _phylo_foundations :: PhyloFoundations
242 , _phylo_timeCooc :: !(Map Date Cooc)
243 , _phylo_timeDocs :: !(Map Date Double)
244 , _phylo_termFreq :: !(Map Int Double)
245 , _phylo_param :: PhyloParam
246 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
247 }
248 deriving (Generic, Show, Eq)
249
250
251 -- | PhyloPeriodId : the id of a given period
252 type PhyloPeriodId = (Date,Date)
253
254 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
255 -- id: tuple (start date, end date) of the temporal step of the phylomemy
256 -- levels: levels of granularity
257 data PhyloPeriod =
258 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
259 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
260 } deriving (Generic, Show, Eq)
261
262
263 -- | Level : a level of clustering
264 type Level = Int
265
266 -- | PhyloLevelId : the id of a level of clustering in a given period
267 type PhyloLevelId = (PhyloPeriodId,Level)
268
269 -- | PhyloLevel : levels of phylomemy on a synchronic axis
270 -- Levels description:
271 -- Level 0: The foundations and the base of the phylo
272 -- Level 1: First level of clustering (the Fis)
273 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
274 data PhyloLevel =
275 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
276 , _phylo_levelLevel :: Level
277 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
278 }
279 deriving (Generic, Show, Eq)
280
281
282 type PhyloGroupId = (PhyloLevelId, Int)
283
284 -- | BranchId : (a level, a sequence of branch index)
285 -- the sequence is a path of heritage from the most to the less specific branch
286 type PhyloBranchId = (Level, [Int])
287
288 -- | PhyloGroup : group of ngrams at each level and period
289 data PhyloGroup =
290 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
291 , _phylo_groupLevel :: Level
292 , _phylo_groupIndex :: Int
293 , _phylo_groupLabel :: Text
294 , _phylo_groupSupport :: Support
295 , _phylo_groupNgrams :: [Int]
296 , _phylo_groupCooc :: !(Cooc)
297 , _phylo_groupBranchId :: PhyloBranchId
298 , _phylo_groupMeta :: Map Text [Double]
299 , _phylo_groupLevelParents :: [Pointer]
300 , _phylo_groupLevelChilds :: [Pointer]
301 , _phylo_groupPeriodParents :: [Pointer]
302 , _phylo_groupPeriodChilds :: [Pointer]
303 }
304 deriving (Generic, Show, Eq, NFData)
305
306 -- | Weight : A generic mesure that can be associated with an Id
307 type Weight = Double
308
309 -- | Pointer : A weighted pointer to a given PhyloGroup
310 type Pointer = (PhyloGroupId, Weight)
311
312 data Filiation = ToParents | ToChilds deriving (Generic, Show)
313 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
314
315
316 ---------------------------
317 -- | Frequent Item Set | --
318 ---------------------------
319
320 -- | Support : Number of Documents where a Clique occurs
321 type Support = Int
322
323 data PhyloCUnit = PhyloCUnit
324 { _phyloCUnit_nodes :: Set Ngrams
325 , _phyloCUnit_support :: Support
326 , _phyloCUnit_period :: (Date,Date)
327 } deriving (Generic,NFData,Show,Eq)
328
329
330 ----------------
331 -- | Export | --
332 ----------------
333
334 type DotId = TextLazy.Text
335
336 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
337
338 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
339
340 data Order = Asc | Desc deriving (Show,Generic,Eq)
341
342 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
343
344 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
345
346 data PhyloLabel =
347 BranchLabel
348 { _branch_labelTagger :: Tagger
349 , _branch_labelSize :: Int }
350 | GroupLabel
351 { _group_labelTagger :: Tagger
352 , _group_labelSize :: Int }
353 deriving (Show,Generic,Eq)
354
355 data PhyloBranch =
356 PhyloBranch
357 { _branch_id :: PhyloBranchId
358 , _branch_label :: Text
359 , _branch_meta :: Map Text [Double]
360 } deriving (Generic, Show)
361
362 data PhyloExport =
363 PhyloExport
364 { _export_groups :: [PhyloGroup]
365 , _export_branches :: [PhyloBranch]
366 } deriving (Generic, Show)
367
368 ----------------
369 -- | Lenses | --
370 ----------------
371
372 makeLenses ''Config
373 makeLenses ''Proximity
374 makeLenses ''Quality
375 makeLenses ''ContextualUnit
376 makeLenses ''PhyloLabel
377 makeLenses ''TimeUnit
378 makeLenses ''PhyloFoundations
379 makeLenses ''PhyloCUnit
380 makeLenses ''Phylo
381 makeLenses ''PhyloPeriod
382 makeLenses ''PhyloLevel
383 makeLenses ''PhyloGroup
384 makeLenses ''PhyloParam
385 makeLenses ''PhyloExport
386 makeLenses ''PhyloBranch
387
388 ------------------------
389 -- | JSON instances | --
390 ------------------------
391
392
393 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)