]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/AdaptativePhylo.hs
temporal is close to be ok, start export
[gargantext.git] / src / Gargantext / Viz / AdaptativePhylo.hs
1 {-|
2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22 -}
23
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
28
29 module Gargantext.Viz.AdaptativePhylo where
30
31 import Data.Aeson
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
35 import Data.Map (Map)
36 import Data.Set (Set)
37
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
41
42 import GHC.Generics
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
46
47 import qualified Data.Text.Lazy as TextLazy
48
49
50 ----------------
51 -- | Config | --
52 ----------------
53
54
55 data CorpusParser =
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
59
60
61 data Proximity =
62 WeightedLogJaccard
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
66 | Hamming
67 deriving (Show,Generic,Eq)
68
69
70 data TimeUnit =
71 Year
72 { _year_period :: Int
73 , _year_step :: Int
74 , _year_matchingFrame :: Int }
75 deriving (Show,Generic,Eq)
76
77
78 data ContextualUnit =
79 Fis
80 { _fis_support :: Int
81 , _fis_size :: Int }
82 deriving (Show,Generic,Eq)
83
84
85 data Config =
86 Config { corpusPath :: FilePath
87 , listPath :: FilePath
88 , outputPath :: FilePath
89 , corpusParser :: CorpusParser
90 , phyloName :: Text
91 , phyloLevel :: Int
92 , phyloProximity :: Proximity
93 , timeUnit :: TimeUnit
94 , contextualUnit :: ContextualUnit
95 , branchSize :: Int
96 } deriving (Show,Generic,Eq)
97
98
99 defaultConfig :: Config
100 defaultConfig =
101 Config { corpusPath = ""
102 , listPath = ""
103 , outputPath = ""
104 , corpusParser = Csv 1000
105 , phyloName = pack "Default Phylo"
106 , phyloLevel = 2
107 , phyloProximity = WeightedLogJaccard 10 0 0.2
108 , timeUnit = Year 3 1 5
109 , contextualUnit = Fis 2 4
110 , branchSize = 3
111 }
112
113 instance FromJSON Config
114 instance ToJSON Config
115 instance FromJSON CorpusParser
116 instance ToJSON CorpusParser
117 instance FromJSON Proximity
118 instance ToJSON Proximity
119 instance FromJSON TimeUnit
120 instance ToJSON TimeUnit
121 instance FromJSON ContextualUnit
122 instance ToJSON ContextualUnit
123
124
125 -- | Software parameters
126 data Software =
127 Software { _software_name :: Text
128 , _software_version :: Text
129 } deriving (Generic, Show, Eq)
130
131 defaultSoftware :: Software
132 defaultSoftware =
133 Software { _software_name = pack "Gargantext"
134 , _software_version = pack "v4" }
135
136
137 -- | Global parameters of a Phylo
138 data PhyloParam =
139 PhyloParam { _phyloParam_version :: Text
140 , _phyloParam_software :: Software
141 , _phyloParam_config :: Config
142 } deriving (Generic, Show, Eq)
143
144 defaultPhyloParam :: PhyloParam
145 defaultPhyloParam =
146 PhyloParam { _phyloParam_version = pack "v2.adaptative"
147 , _phyloParam_software = defaultSoftware
148 , _phyloParam_config = defaultConfig }
149
150
151 ------------------
152 -- | Document | --
153 ------------------
154
155
156 -- | Date : a simple Integer
157 type Date = Int
158
159 -- | Ngrams : a contiguous sequence of n terms
160 type Ngrams = Text
161
162 -- | Document : a piece of Text linked to a Date
163 data Document = Document
164 { date :: Date
165 , text :: [Ngrams]
166 } deriving (Eq,Show,Generic,NFData)
167
168
169 --------------------
170 -- | Foundation | --
171 --------------------
172
173
174 -- | The Foundations of a Phylo created from a given TermList
175 data PhyloFoundations = PhyloFoundations
176 { _foundations_roots :: !(Vector Ngrams)
177 , _foundations_mapList :: TermList
178 } deriving (Generic, Show, Eq)
179
180
181 ---------------------------
182 -- | Coocurency Matrix | --
183 ---------------------------
184
185
186 -- | Cooc : a coocurency matrix between two ngrams
187 type Cooc = Map (Int,Int) Double
188
189
190 -------------------
191 -- | Phylomemy | --
192 -------------------
193
194
195 -- | Phylo datatype of a phylomemy
196 -- foundations : the foundations of the phylo
197 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
198 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
199 -- param : the parameters of the phylomemy (with the user's configuration)
200 -- periods : the temporal steps of a phylomemy
201 data Phylo =
202 Phylo { _phylo_foundations :: PhyloFoundations
203 , _phylo_timeCooc :: !(Map Date Cooc)
204 , _phylo_timeDocs :: !(Map Date Double)
205 , _phylo_param :: PhyloParam
206 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
207 }
208 deriving (Generic, Show, Eq)
209
210
211 -- | PhyloPeriodId : the id of a given period
212 type PhyloPeriodId = (Date,Date)
213
214 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
215 -- id: tuple (start date, end date) of the temporal step of the phylomemy
216 -- levels: levels of granularity
217 data PhyloPeriod =
218 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
219 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
220 } deriving (Generic, Show, Eq)
221
222
223 -- | Level : a level of clustering
224 type Level = Int
225
226 -- | PhyloLevelId : the id of a level of clustering in a given period
227 type PhyloLevelId = (PhyloPeriodId,Level)
228
229 -- | PhyloLevel : levels of phylomemy on a synchronic axis
230 -- Levels description:
231 -- Level 0: The foundations and the base of the phylo
232 -- Level 1: First level of clustering (the Fis)
233 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
234 data PhyloLevel =
235 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
236 , _phylo_levelLevel :: Level
237 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
238 }
239 deriving (Generic, Show, Eq)
240
241
242 type PhyloGroupId = (PhyloLevelId, Int)
243
244 -- | BranchId : (a level, a sequence of branch index)
245 -- the sequence is a path of heritage from the most to the less specific branch
246 type PhyloBranchId = (Level, [Int])
247
248 -- | PhyloGroup : group of ngrams at each level and period
249 data PhyloGroup =
250 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
251 , _phylo_groupLevel :: Level
252 , _phylo_groupIndex :: Int
253 , _phylo_groupSupport :: Support
254 , _phylo_groupNgrams :: [Int]
255 , _phylo_groupCooc :: !(Cooc)
256 , _phylo_groupBranchId :: PhyloBranchId
257 , _phylo_groupLevelParents :: [Pointer]
258 , _phylo_groupLevelChilds :: [Pointer]
259 , _phylo_groupPeriodParents :: [Pointer]
260 , _phylo_groupPeriodChilds :: [Pointer]
261 , _phylo_groupGhostPointers :: [Pointer]
262 }
263 deriving (Generic, Show, Eq)
264
265 -- | Weight : A generic mesure that can be associated with an Id
266 type Weight = Double
267
268 -- | Pointer : A weighted pointer to a given PhyloGroup
269 type Pointer = (PhyloGroupId, Weight)
270
271 type Link = ((PhyloGroupId, PhyloGroupId), Weight)
272
273 data Filiation = ToParents | ToChilds deriving (Generic, Show)
274 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
275
276
277 ---------------------------
278 -- | Frequent Item Set | --
279 ---------------------------
280
281 -- | Clique : Set of ngrams cooccurring in the same Document
282 type Clique = Set Ngrams
283
284 -- | Support : Number of Documents where a Clique occurs
285 type Support = Int
286
287 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
288 data PhyloFis = PhyloFis
289 { _phyloFis_clique :: Clique
290 , _phyloFis_support :: Support
291 , _phyloFis_period :: (Date,Date)
292 } deriving (Generic,NFData,Show,Eq)
293
294
295 ----------------
296 -- | Export | --
297 ----------------
298
299 type DotId = TextLazy.Text
300
301 ----------------
302 -- | Lenses | --
303 ----------------
304
305 makeLenses ''Config
306 makeLenses ''Proximity
307 makeLenses ''ContextualUnit
308 makeLenses ''TimeUnit
309 makeLenses ''PhyloFoundations
310 makeLenses ''PhyloFis
311 makeLenses ''Phylo
312 makeLenses ''PhyloPeriod
313 makeLenses ''PhyloLevel
314 makeLenses ''PhyloGroup
315 makeLenses ''PhyloParam
316
317 ------------------------
318 -- | JSON instances | --
319 ------------------------
320
321
322 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)