]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/AdaptativePhylo.hs
add the dynamics and the labels
[gargantext.git] / src / Gargantext / Viz / AdaptativePhylo.hs
1 {-|
2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22 -}
23
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
28
29 module Gargantext.Viz.AdaptativePhylo where
30
31 import Data.Aeson
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
35 import Data.Map (Map)
36 import Data.Set (Set)
37
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
41
42 import GHC.Generics
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
46
47 import qualified Data.Text.Lazy as TextLazy
48
49
50 ----------------
51 -- | Config | --
52 ----------------
53
54
55 data CorpusParser =
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
59
60
61 data Proximity =
62 WeightedLogJaccard
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
66 | Hamming
67 deriving (Show,Generic,Eq)
68
69
70 data TimeUnit =
71 Year
72 { _year_period :: Int
73 , _year_step :: Int
74 , _year_matchingFrame :: Int }
75 deriving (Show,Generic,Eq)
76
77
78 data ContextualUnit =
79 Fis
80 { _fis_support :: Int
81 , _fis_size :: Int }
82 deriving (Show,Generic,Eq)
83
84
85 data Config =
86 Config { corpusPath :: FilePath
87 , listPath :: FilePath
88 , outputPath :: FilePath
89 , corpusParser :: CorpusParser
90 , phyloName :: Text
91 , phyloLevel :: Int
92 , phyloProximity :: Proximity
93 , timeUnit :: TimeUnit
94 , contextualUnit :: ContextualUnit
95 , exportLabel :: [Label]
96 , branchSize :: Int
97 } deriving (Show,Generic,Eq)
98
99
100 defaultConfig :: Config
101 defaultConfig =
102 Config { corpusPath = ""
103 , listPath = ""
104 , outputPath = ""
105 , corpusParser = Csv 1000
106 , phyloName = pack "Default Phylo"
107 , phyloLevel = 2
108 , phyloProximity = WeightedLogJaccard 10 0 0.2
109 , timeUnit = Year 3 1 5
110 , contextualUnit = Fis 2 4
111 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
112 , branchSize = 3
113 }
114
115 instance FromJSON Config
116 instance ToJSON Config
117 instance FromJSON CorpusParser
118 instance ToJSON CorpusParser
119 instance FromJSON Proximity
120 instance ToJSON Proximity
121 instance FromJSON TimeUnit
122 instance ToJSON TimeUnit
123 instance FromJSON ContextualUnit
124 instance ToJSON ContextualUnit
125 instance FromJSON Label
126 instance ToJSON Label
127 instance FromJSON Tagger
128 instance ToJSON Tagger
129
130
131 -- | Software parameters
132 data Software =
133 Software { _software_name :: Text
134 , _software_version :: Text
135 } deriving (Generic, Show, Eq)
136
137 defaultSoftware :: Software
138 defaultSoftware =
139 Software { _software_name = pack "Gargantext"
140 , _software_version = pack "v4" }
141
142
143 -- | Global parameters of a Phylo
144 data PhyloParam =
145 PhyloParam { _phyloParam_version :: Text
146 , _phyloParam_software :: Software
147 , _phyloParam_config :: Config
148 } deriving (Generic, Show, Eq)
149
150 defaultPhyloParam :: PhyloParam
151 defaultPhyloParam =
152 PhyloParam { _phyloParam_version = pack "v2.adaptative"
153 , _phyloParam_software = defaultSoftware
154 , _phyloParam_config = defaultConfig }
155
156
157 ------------------
158 -- | Document | --
159 ------------------
160
161
162 -- | Date : a simple Integer
163 type Date = Int
164
165 -- | Ngrams : a contiguous sequence of n terms
166 type Ngrams = Text
167
168 -- | Document : a piece of Text linked to a Date
169 data Document = Document
170 { date :: Date
171 , text :: [Ngrams]
172 } deriving (Eq,Show,Generic,NFData)
173
174
175 --------------------
176 -- | Foundation | --
177 --------------------
178
179
180 -- | The Foundations of a Phylo created from a given TermList
181 data PhyloFoundations = PhyloFoundations
182 { _foundations_roots :: !(Vector Ngrams)
183 , _foundations_mapList :: TermList
184 } deriving (Generic, Show, Eq)
185
186
187 ---------------------------
188 -- | Coocurency Matrix | --
189 ---------------------------
190
191
192 -- | Cooc : a coocurency matrix between two ngrams
193 type Cooc = Map (Int,Int) Double
194
195
196 -------------------
197 -- | Phylomemy | --
198 -------------------
199
200
201 -- | Phylo datatype of a phylomemy
202 -- foundations : the foundations of the phylo
203 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
204 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
205 -- param : the parameters of the phylomemy (with the user's configuration)
206 -- periods : the temporal steps of a phylomemy
207 data Phylo =
208 Phylo { _phylo_foundations :: PhyloFoundations
209 , _phylo_timeCooc :: !(Map Date Cooc)
210 , _phylo_timeDocs :: !(Map Date Double)
211 , _phylo_param :: PhyloParam
212 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
213 }
214 deriving (Generic, Show, Eq)
215
216
217 -- | PhyloPeriodId : the id of a given period
218 type PhyloPeriodId = (Date,Date)
219
220 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
221 -- id: tuple (start date, end date) of the temporal step of the phylomemy
222 -- levels: levels of granularity
223 data PhyloPeriod =
224 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
225 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
226 } deriving (Generic, Show, Eq)
227
228
229 -- | Level : a level of clustering
230 type Level = Int
231
232 -- | PhyloLevelId : the id of a level of clustering in a given period
233 type PhyloLevelId = (PhyloPeriodId,Level)
234
235 -- | PhyloLevel : levels of phylomemy on a synchronic axis
236 -- Levels description:
237 -- Level 0: The foundations and the base of the phylo
238 -- Level 1: First level of clustering (the Fis)
239 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
240 data PhyloLevel =
241 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
242 , _phylo_levelLevel :: Level
243 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
244 }
245 deriving (Generic, Show, Eq)
246
247
248 type PhyloGroupId = (PhyloLevelId, Int)
249
250 -- | BranchId : (a level, a sequence of branch index)
251 -- the sequence is a path of heritage from the most to the less specific branch
252 type PhyloBranchId = (Level, [Int])
253
254 -- | PhyloGroup : group of ngrams at each level and period
255 data PhyloGroup =
256 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
257 , _phylo_groupLevel :: Level
258 , _phylo_groupIndex :: Int
259 , _phylo_groupLabel :: Text
260 , _phylo_groupSupport :: Support
261 , _phylo_groupNgrams :: [Int]
262 , _phylo_groupCooc :: !(Cooc)
263 , _phylo_groupBranchId :: PhyloBranchId
264 , _phylo_groupMeta :: Map Text [Double]
265 , _phylo_groupLevelParents :: [Pointer]
266 , _phylo_groupLevelChilds :: [Pointer]
267 , _phylo_groupPeriodParents :: [Pointer]
268 , _phylo_groupPeriodChilds :: [Pointer]
269 }
270 deriving (Generic, Show, Eq)
271
272 -- | Weight : A generic mesure that can be associated with an Id
273 type Weight = Double
274
275 -- | Pointer : A weighted pointer to a given PhyloGroup
276 type Pointer = (PhyloGroupId, Weight)
277
278 data Filiation = ToParents | ToChilds deriving (Generic, Show)
279 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
280
281
282 ---------------------------
283 -- | Frequent Item Set | --
284 ---------------------------
285
286 -- | Clique : Set of ngrams cooccurring in the same Document
287 type Clique = Set Ngrams
288
289 -- | Support : Number of Documents where a Clique occurs
290 type Support = Int
291
292 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
293 data PhyloFis = PhyloFis
294 { _phyloFis_clique :: Clique
295 , _phyloFis_support :: Support
296 , _phyloFis_period :: (Date,Date)
297 } deriving (Generic,NFData,Show,Eq)
298
299
300 ----------------
301 -- | Export | --
302 ----------------
303
304 type DotId = TextLazy.Text
305
306 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
307
308 data Label =
309 BranchLabel
310 { _branch_labelTagger :: Tagger
311 , _branch_labelSize :: Int }
312 | GroupLabel
313 { _group_labelTagger :: Tagger
314 , _group_labelSize :: Int }
315 deriving (Show,Generic,Eq)
316
317 data PhyloBranch =
318 PhyloBranch
319 { _branch_id :: PhyloBranchId
320 , _branch_label :: Text
321 } deriving (Generic, Show)
322
323 data PhyloExport =
324 PhyloExport
325 { _export_groups :: [PhyloGroup]
326 , _export_branches :: [PhyloBranch]
327 } deriving (Generic, Show)
328
329 ----------------
330 -- | Lenses | --
331 ----------------
332
333 makeLenses ''Config
334 makeLenses ''Proximity
335 makeLenses ''ContextualUnit
336 makeLenses ''Label
337 makeLenses ''TimeUnit
338 makeLenses ''PhyloFoundations
339 makeLenses ''PhyloFis
340 makeLenses ''Phylo
341 makeLenses ''PhyloPeriod
342 makeLenses ''PhyloLevel
343 makeLenses ''PhyloGroup
344 makeLenses ''PhyloParam
345 makeLenses ''PhyloExport
346 makeLenses ''PhyloBranch
347
348 ------------------------
349 -- | JSON instances | --
350 ------------------------
351
352
353 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)