]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/AdaptativePhylo.hs
export done
[gargantext.git] / src / Gargantext / Viz / AdaptativePhylo.hs
1 {-|
2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22 -}
23
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
28
29 module Gargantext.Viz.AdaptativePhylo where
30
31 import Data.Aeson
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
35 import Data.Map (Map)
36 import Data.Set (Set)
37
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
41
42 import GHC.Generics
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
46
47 import qualified Data.Text.Lazy as TextLazy
48
49
50 ----------------
51 -- | Config | --
52 ----------------
53
54
55 data CorpusParser =
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
59
60
61 data Proximity =
62 WeightedLogJaccard
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
66 | Hamming
67 deriving (Show,Generic,Eq)
68
69
70 data TimeUnit =
71 Year
72 { _year_period :: Int
73 , _year_step :: Int
74 , _year_matchingFrame :: Int }
75 deriving (Show,Generic,Eq)
76
77
78 data ContextualUnit =
79 Fis
80 { _fis_support :: Int
81 , _fis_size :: Int }
82 deriving (Show,Generic,Eq)
83
84
85 data Config =
86 Config { corpusPath :: FilePath
87 , listPath :: FilePath
88 , outputPath :: FilePath
89 , corpusParser :: CorpusParser
90 , phyloName :: Text
91 , phyloLevel :: Int
92 , phyloProximity :: Proximity
93 , timeUnit :: TimeUnit
94 , contextualUnit :: ContextualUnit
95 , exportLabel :: [PhyloLabel]
96 , exportSort :: Sort
97 , exportFilter :: [Filter]
98 } deriving (Show,Generic,Eq)
99
100
101 defaultConfig :: Config
102 defaultConfig =
103 Config { corpusPath = ""
104 , listPath = ""
105 , outputPath = ""
106 , corpusParser = Csv 1000
107 , phyloName = pack "Default Phylo"
108 , phyloLevel = 1
109 , phyloProximity = WeightedLogJaccard 10 0 0.1
110 , timeUnit = Year 3 1 5
111 , contextualUnit = Fis 2 4
112 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
113 , exportSort = ByHierarchy
114 , exportFilter = [ByBranchSize 2]
115 }
116
117 instance FromJSON Config
118 instance ToJSON Config
119 instance FromJSON CorpusParser
120 instance ToJSON CorpusParser
121 instance FromJSON Proximity
122 instance ToJSON Proximity
123 instance FromJSON TimeUnit
124 instance ToJSON TimeUnit
125 instance FromJSON ContextualUnit
126 instance ToJSON ContextualUnit
127 instance FromJSON PhyloLabel
128 instance ToJSON PhyloLabel
129 instance FromJSON Tagger
130 instance ToJSON Tagger
131 instance FromJSON Sort
132 instance ToJSON Sort
133 instance FromJSON Order
134 instance ToJSON Order
135 instance FromJSON Filter
136 instance ToJSON Filter
137
138
139 -- | Software parameters
140 data Software =
141 Software { _software_name :: Text
142 , _software_version :: Text
143 } deriving (Generic, Show, Eq)
144
145 defaultSoftware :: Software
146 defaultSoftware =
147 Software { _software_name = pack "Gargantext"
148 , _software_version = pack "v4" }
149
150
151 -- | Global parameters of a Phylo
152 data PhyloParam =
153 PhyloParam { _phyloParam_version :: Text
154 , _phyloParam_software :: Software
155 , _phyloParam_config :: Config
156 } deriving (Generic, Show, Eq)
157
158 defaultPhyloParam :: PhyloParam
159 defaultPhyloParam =
160 PhyloParam { _phyloParam_version = pack "v2.adaptative"
161 , _phyloParam_software = defaultSoftware
162 , _phyloParam_config = defaultConfig }
163
164
165 ------------------
166 -- | Document | --
167 ------------------
168
169
170 -- | Date : a simple Integer
171 type Date = Int
172
173 -- | Ngrams : a contiguous sequence of n terms
174 type Ngrams = Text
175
176 -- | Document : a piece of Text linked to a Date
177 data Document = Document
178 { date :: Date
179 , text :: [Ngrams]
180 } deriving (Eq,Show,Generic,NFData)
181
182
183 --------------------
184 -- | Foundation | --
185 --------------------
186
187
188 -- | The Foundations of a Phylo created from a given TermList
189 data PhyloFoundations = PhyloFoundations
190 { _foundations_roots :: !(Vector Ngrams)
191 , _foundations_mapList :: TermList
192 } deriving (Generic, Show, Eq)
193
194
195 ---------------------------
196 -- | Coocurency Matrix | --
197 ---------------------------
198
199
200 -- | Cooc : a coocurency matrix between two ngrams
201 type Cooc = Map (Int,Int) Double
202
203
204 -------------------
205 -- | Phylomemy | --
206 -------------------
207
208
209 -- | Phylo datatype of a phylomemy
210 -- foundations : the foundations of the phylo
211 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
212 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
213 -- param : the parameters of the phylomemy (with the user's configuration)
214 -- periods : the temporal steps of a phylomemy
215 data Phylo =
216 Phylo { _phylo_foundations :: PhyloFoundations
217 , _phylo_timeCooc :: !(Map Date Cooc)
218 , _phylo_timeDocs :: !(Map Date Double)
219 , _phylo_param :: PhyloParam
220 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
221 }
222 deriving (Generic, Show, Eq)
223
224
225 -- | PhyloPeriodId : the id of a given period
226 type PhyloPeriodId = (Date,Date)
227
228 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
229 -- id: tuple (start date, end date) of the temporal step of the phylomemy
230 -- levels: levels of granularity
231 data PhyloPeriod =
232 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
233 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
234 } deriving (Generic, Show, Eq)
235
236
237 -- | Level : a level of clustering
238 type Level = Int
239
240 -- | PhyloLevelId : the id of a level of clustering in a given period
241 type PhyloLevelId = (PhyloPeriodId,Level)
242
243 -- | PhyloLevel : levels of phylomemy on a synchronic axis
244 -- Levels description:
245 -- Level 0: The foundations and the base of the phylo
246 -- Level 1: First level of clustering (the Fis)
247 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
248 data PhyloLevel =
249 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
250 , _phylo_levelLevel :: Level
251 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
252 }
253 deriving (Generic, Show, Eq)
254
255
256 type PhyloGroupId = (PhyloLevelId, Int)
257
258 -- | BranchId : (a level, a sequence of branch index)
259 -- the sequence is a path of heritage from the most to the less specific branch
260 type PhyloBranchId = (Level, [Int])
261
262 -- | PhyloGroup : group of ngrams at each level and period
263 data PhyloGroup =
264 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
265 , _phylo_groupLevel :: Level
266 , _phylo_groupIndex :: Int
267 , _phylo_groupLabel :: Text
268 , _phylo_groupSupport :: Support
269 , _phylo_groupNgrams :: [Int]
270 , _phylo_groupCooc :: !(Cooc)
271 , _phylo_groupBranchId :: PhyloBranchId
272 , _phylo_groupMeta :: Map Text [Double]
273 , _phylo_groupLevelParents :: [Pointer]
274 , _phylo_groupLevelChilds :: [Pointer]
275 , _phylo_groupPeriodParents :: [Pointer]
276 , _phylo_groupPeriodChilds :: [Pointer]
277 }
278 deriving (Generic, Show, Eq)
279
280 -- | Weight : A generic mesure that can be associated with an Id
281 type Weight = Double
282
283 -- | Pointer : A weighted pointer to a given PhyloGroup
284 type Pointer = (PhyloGroupId, Weight)
285
286 data Filiation = ToParents | ToChilds deriving (Generic, Show)
287 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
288
289
290 ---------------------------
291 -- | Frequent Item Set | --
292 ---------------------------
293
294 -- | Clique : Set of ngrams cooccurring in the same Document
295 type Clique = Set Ngrams
296
297 -- | Support : Number of Documents where a Clique occurs
298 type Support = Int
299
300 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
301 data PhyloFis = PhyloFis
302 { _phyloFis_clique :: Clique
303 , _phyloFis_support :: Support
304 , _phyloFis_period :: (Date,Date)
305 } deriving (Generic,NFData,Show,Eq)
306
307
308 ----------------
309 -- | Export | --
310 ----------------
311
312 type DotId = TextLazy.Text
313
314 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
315
316 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
317
318 data Order = Asc | Desc deriving (Show,Generic,Eq)
319
320 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
321
322 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
323
324 data PhyloLabel =
325 BranchLabel
326 { _branch_labelTagger :: Tagger
327 , _branch_labelSize :: Int }
328 | GroupLabel
329 { _group_labelTagger :: Tagger
330 , _group_labelSize :: Int }
331 deriving (Show,Generic,Eq)
332
333 data PhyloBranch =
334 PhyloBranch
335 { _branch_id :: PhyloBranchId
336 , _branch_label :: Text
337 , _branch_meta :: Map Text [Double]
338 } deriving (Generic, Show)
339
340 data PhyloExport =
341 PhyloExport
342 { _export_groups :: [PhyloGroup]
343 , _export_branches :: [PhyloBranch]
344 } deriving (Generic, Show)
345
346 ----------------
347 -- | Lenses | --
348 ----------------
349
350 makeLenses ''Config
351 makeLenses ''Proximity
352 makeLenses ''ContextualUnit
353 makeLenses ''PhyloLabel
354 makeLenses ''TimeUnit
355 makeLenses ''PhyloFoundations
356 makeLenses ''PhyloFis
357 makeLenses ''Phylo
358 makeLenses ''PhyloPeriod
359 makeLenses ''PhyloLevel
360 makeLenses ''PhyloGroup
361 makeLenses ''PhyloParam
362 makeLenses ''PhyloExport
363 makeLenses ''PhyloBranch
364
365 ------------------------
366 -- | JSON instances | --
367 ------------------------
368
369
370 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)