]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/AdaptativePhylo.hs
Merge branch 'dev' into dev-phylo
[gargantext.git] / src / Gargantext / Viz / AdaptativePhylo.hs
1 {-|
2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22 -}
23
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
28
29 module Gargantext.Viz.AdaptativePhylo where
30
31 import Data.Aeson
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
35 import Data.Map (Map)
36 import Data.Set (Set)
37
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
41
42 import GHC.Generics
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
46
47
48 ----------------
49 -- | Config | --
50 ----------------
51
52
53 data CorpusParser =
54 Wos {_wos_limit :: Int}
55 | Csv {_csv_limit :: Int}
56 deriving (Show,Generic,Eq)
57
58
59 data Proximity =
60 WeightedLogJaccard
61 { _wlj_sensibility :: Double
62 , _wlj_thresholdInit :: Double
63 , _wlj_thresholdStep :: Double }
64 | Hamming
65 deriving (Show,Generic,Eq)
66
67
68 data TimeUnit =
69 Year
70 { _year_period :: Int
71 , _year_step :: Int
72 , _year_matchingFrame :: Int }
73 deriving (Show,Generic,Eq)
74
75
76 data ContextualUnit =
77 Fis
78 { _fis_support :: Int
79 , _fis_size :: Int }
80 deriving (Show,Generic,Eq)
81
82
83 data Config =
84 Config { corpusPath :: FilePath
85 , listPath :: FilePath
86 , outputPath :: FilePath
87 , corpusParser :: CorpusParser
88 , phyloName :: Text
89 , phyloLevel :: Int
90 , phyloProximity :: Proximity
91 , timeUnit :: TimeUnit
92 , contextualUnit :: ContextualUnit
93 , branchSize :: Int
94 } deriving (Show,Generic,Eq)
95
96
97 defaultConfig :: Config
98 defaultConfig =
99 Config { corpusPath = ""
100 , listPath = ""
101 , outputPath = ""
102 , corpusParser = Csv 1000
103 , phyloName = pack "Default Phylo"
104 , phyloLevel = 2
105 , phyloProximity = WeightedLogJaccard 10 0 0.05
106 , timeUnit = Year 3 1 5
107 , contextualUnit = Fis 2 4
108 , branchSize = 3
109 }
110
111 instance FromJSON Config
112 instance ToJSON Config
113 instance FromJSON CorpusParser
114 instance ToJSON CorpusParser
115 instance FromJSON Proximity
116 instance ToJSON Proximity
117 instance FromJSON TimeUnit
118 instance ToJSON TimeUnit
119 instance FromJSON ContextualUnit
120 instance ToJSON ContextualUnit
121
122
123 -- | Software parameters
124 data Software =
125 Software { _software_name :: Text
126 , _software_version :: Text
127 } deriving (Generic, Show, Eq)
128
129 defaultSoftware :: Software
130 defaultSoftware =
131 Software { _software_name = pack "Gargantext"
132 , _software_version = pack "v4" }
133
134
135 -- | Global parameters of a Phylo
136 data PhyloParam =
137 PhyloParam { _phyloParam_version :: Text
138 , _phyloParam_software :: Software
139 , _phyloParam_config :: Config
140 } deriving (Generic, Show, Eq)
141
142 defaultPhyloParam :: PhyloParam
143 defaultPhyloParam =
144 PhyloParam { _phyloParam_version = pack "v2.adaptative"
145 , _phyloParam_software = defaultSoftware
146 , _phyloParam_config = defaultConfig }
147
148
149 ------------------
150 -- | Document | --
151 ------------------
152
153
154 -- | Date : a simple Integer
155 type Date = Int
156
157 -- | Ngrams : a contiguous sequence of n terms
158 type Ngrams = Text
159
160 -- | Document : a piece of Text linked to a Date
161 data Document = Document
162 { date :: Date
163 , text :: [Ngrams]
164 } deriving (Eq,Show,Generic,NFData)
165
166
167 --------------------
168 -- | Foundation | --
169 --------------------
170
171
172 -- | The Foundations of a Phylo created from a given TermList
173 data PhyloFoundations = PhyloFoundations
174 { _foundations_roots :: !(Vector Ngrams)
175 , _foundations_mapList :: TermList
176 } deriving (Generic, Show, Eq)
177
178
179 ---------------------------
180 -- | Coocurency Matrix | --
181 ---------------------------
182
183
184 -- | Cooc : a coocurency matrix between two ngrams
185 type Cooc = Map (Int,Int) Double
186
187
188 -------------------
189 -- | Phylomemy | --
190 -------------------
191
192
193 -- | Phylo datatype of a phylomemy
194 -- foundations : the foundations of the phylo
195 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
196 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
197 -- param : the parameters of the phylomemy (with the user's configuration)
198 -- periods : the temporal steps of a phylomemy
199 data Phylo =
200 Phylo { _phylo_foundations :: PhyloFoundations
201 , _phylo_timeCooc :: !(Map Date Cooc)
202 , _phylo_timeDocs :: !(Map Date Double)
203 , _phylo_param :: PhyloParam
204 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
205 }
206 deriving (Generic, Show, Eq)
207
208
209 -- | PhyloPeriodId : the id of a given period
210 type PhyloPeriodId = (Date,Date)
211
212 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
213 -- id: tuple (start date, end date) of the temporal step of the phylomemy
214 -- levels: levels of granularity
215 data PhyloPeriod =
216 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
217 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
218 } deriving (Generic, Show, Eq)
219
220
221 -- | Level : a level of clustering
222 type Level = Int
223
224 -- | PhyloLevelId : the id of a level of clustering in a given period
225 type PhyloLevelId = (PhyloPeriodId,Level)
226
227 -- | PhyloLevel : levels of phylomemy on a synchronic axis
228 -- Levels description:
229 -- Level 0: The foundations and the base of the phylo
230 -- Level 1: First level of clustering (the Fis)
231 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
232 data PhyloLevel =
233 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
234 , _phylo_levelLevel :: Level
235 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
236 }
237 deriving (Generic, Show, Eq)
238
239
240 type PhyloGroupId = (PhyloLevelId, Int)
241
242 -- | BranchId : (a level, a sequence of branch index)
243 -- the sequence is a path of heritage from the most to the less specific branch
244 type PhyloBranchId = (Level, [Int])
245
246 -- | PhyloGroup : group of ngrams at each level and period
247 data PhyloGroup =
248 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
249 , _phylo_groupLevel :: Level
250 , _phylo_groupIndex :: Int
251 , _phylo_groupSupport :: Support
252 , _phylo_groupNgrams :: [Int]
253 , _phylo_groupCooc :: !(Cooc)
254 , _phylo_groupBranchId :: PhyloBranchId
255 , _phylo_groupLevelParents :: [Pointer]
256 , _phylo_groupLevelChilds :: [Pointer]
257 , _phylo_groupPeriodParents :: [Pointer]
258 , _phylo_groupPeriodChilds :: [Pointer]
259 , _phylo_groupGhostPointers :: [Pointer]
260 }
261 deriving (Generic, Show, Eq)
262
263 -- | Weight : A generic mesure that can be associated with an Id
264 type Weight = Double
265
266 -- | Pointer : A weighted pointer to a given PhyloGroup
267 type Pointer = (PhyloGroupId, Weight)
268
269 type Link = ((PhyloGroupId, PhyloGroupId), Weight)
270
271 data Filiation = ToParents | ToChilds deriving (Generic, Show)
272 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
273
274
275 ---------------------------
276 -- | Frequent Item Set | --
277 ---------------------------
278
279 -- | Clique : Set of ngrams cooccurring in the same Document
280 type Clique = Set Ngrams
281
282 -- | Support : Number of Documents where a Clique occurs
283 type Support = Int
284
285 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
286 data PhyloFis = PhyloFis
287 { _phyloFis_clique :: Clique
288 , _phyloFis_support :: Support
289 , _phyloFis_period :: (Date,Date)
290 } deriving (Generic,NFData,Show,Eq)
291
292
293 ----------------
294 -- | Lenses | --
295 ----------------
296
297 makeLenses ''Config
298 makeLenses ''Proximity
299 makeLenses ''ContextualUnit
300 makeLenses ''TimeUnit
301 makeLenses ''PhyloFoundations
302 makeLenses ''PhyloFis
303 makeLenses ''Phylo
304 makeLenses ''PhyloPeriod
305 makeLenses ''PhyloLevel
306 makeLenses ''PhyloGroup
307 makeLenses ''PhyloParam
308
309 ------------------------
310 -- | JSON instances | --
311 ------------------------
312
313
314 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)