]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/AdaptativePhylo.hs
color update
[gargantext.git] / src / Gargantext / Viz / AdaptativePhylo.hs
1 {-|
2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22 -}
23
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
28
29 module Gargantext.Viz.AdaptativePhylo where
30
31 import Data.Aeson
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
35 import Data.Map (Map)
36 import Data.Set (Set)
37
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
41
42 import GHC.Generics
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
46
47
48 ----------------
49 -- | Config | --
50 ----------------
51
52
53 data CorpusParser = Wos | Csv deriving (Show,Generic,Eq)
54
55 data Proximity = WeightedLogJaccard {_sensibility :: Double}
56 | Hamming
57 deriving (Show,Generic,Eq)
58
59 data Config =
60 Config { corpusPath :: FilePath
61 , listPath :: FilePath
62 , outputPath :: FilePath
63 , corpusParser :: CorpusParser
64 , corpusLimit :: Int
65 , phyloName :: Text
66 , phyloLevel :: Int
67 , phyloProximity :: Proximity
68 , timeUnit :: Int
69 , maxTimeMatch :: Int
70 , timePeriod :: Int
71 , timeStep :: Int
72 , fisSupport :: Int
73 , fisSize :: Int
74 , branchSize :: Int
75 } deriving (Show,Generic,Eq)
76
77 defaultConfig :: Config
78 defaultConfig =
79 Config { corpusPath = ""
80 , listPath = ""
81 , outputPath = ""
82 , corpusParser = Csv
83 , corpusLimit = 1000
84 , phyloName = pack "Default Phylo"
85 , phyloLevel = 2
86 , phyloProximity = WeightedLogJaccard 10
87 , timeUnit = 1
88 , maxTimeMatch = 5
89 , timePeriod = 3
90 , timeStep = 1
91 , fisSupport = 2
92 , fisSize = 4
93 , branchSize = 3
94 }
95
96 instance FromJSON Config
97 instance ToJSON Config
98 instance FromJSON CorpusParser
99 instance ToJSON CorpusParser
100 instance FromJSON Proximity
101 instance ToJSON Proximity
102
103
104 -- | Software parameters
105 data Software =
106 Software { _software_name :: Text
107 , _software_version :: Text
108 } deriving (Generic, Show, Eq)
109
110 defaultSoftware :: Software
111 defaultSoftware =
112 Software { _software_name = pack "Gargantext"
113 , _software_version = pack "v4" }
114
115
116 -- | Global parameters of a Phylo
117 data PhyloParam =
118 PhyloParam { _phyloParam_version :: Text
119 , _phyloParam_software :: Software
120 , _phyloParam_config :: Config
121 } deriving (Generic, Show, Eq)
122
123 defaultPhyloParam :: PhyloParam
124 defaultPhyloParam =
125 PhyloParam { _phyloParam_version = pack "v2.adaptative"
126 , _phyloParam_software = defaultSoftware
127 , _phyloParam_config = defaultConfig }
128
129
130 ------------------
131 -- | Document | --
132 ------------------
133
134
135 -- | Date : a simple Integer
136 type Date = Int
137
138 -- | Ngrams : a contiguous sequence of n terms
139 type Ngrams = Text
140
141 -- | Document : a piece of Text linked to a Date
142 data Document = Document
143 { date :: Date
144 , text :: [Ngrams]
145 } deriving (Eq,Show,Generic,NFData)
146
147
148 --------------------
149 -- | Foundation | --
150 --------------------
151
152
153 -- | The Foundations of a Phylo created from a given TermList
154 data PhyloFoundations = PhyloFoundations
155 { _foundations_roots :: !(Vector Ngrams)
156 , _foundations_mapList :: TermList
157 } deriving (Generic, Show, Eq)
158
159
160 ---------------------------
161 -- | Coocurency Matrix | --
162 ---------------------------
163
164
165 -- | Cooc : a coocurency matrix between two ngrams
166 type Cooc = Map (Int,Int) Double
167
168
169 -------------------
170 -- | Phylomemy | --
171 -------------------
172
173
174 -- | Phylo datatype of a phylomemy
175 -- foundations : the foundations of the phylo
176 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
177 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
178 -- param : the parameters of the phylomemy (with the user's configuration)
179 -- periods : the temporal steps of a phylomemy
180 data Phylo =
181 Phylo { _phylo_foundations :: PhyloFoundations
182 , _phylo_timeCooc :: !(Map Date Cooc)
183 , _phylo_timeDocs :: !(Map Date Double)
184 , _phylo_param :: PhyloParam
185 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
186 }
187 deriving (Generic, Show, Eq)
188
189
190 -- | PhyloPeriodId : the id of a given period
191 type PhyloPeriodId = (Date,Date)
192
193 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
194 -- id: tuple (start date, end date) of the temporal step of the phylomemy
195 -- levels: levels of granularity
196 data PhyloPeriod =
197 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
198 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
199 } deriving (Generic, Show, Eq)
200
201
202 -- | Level : a level of clustering
203 type Level = Int
204
205 -- | PhyloLevelId : the id of a level of clustering in a given period
206 type PhyloLevelId = (PhyloPeriodId,Level)
207
208 -- | PhyloLevel : levels of phylomemy on a synchronic axis
209 -- Levels description:
210 -- Level 0: The foundations and the base of the phylo
211 -- Level 1: First level of clustering (the Fis)
212 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
213 data PhyloLevel =
214 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
215 , _phylo_levelLevel :: Level
216 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
217 }
218 deriving (Generic, Show, Eq)
219
220
221 type PhyloGroupId = (PhyloLevelId, Int)
222
223 -- | BranchId : (a level, a sequence of branch index)
224 -- the sequence is a path of heritage from the most to the less specific branch
225 type PhyloBranchId = (Level, [Int])
226
227 -- | PhyloGroup : group of ngrams at each level and period
228 data PhyloGroup =
229 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
230 , _phylo_groupLevel :: Level
231 , _phylo_groupIndex :: Int
232 , _phylo_groupSupport :: Support
233 , _phylo_groupNgrams :: [Int]
234 , _phylo_groupCooc :: !(Cooc)
235 , _phylo_groupBranchId :: PhyloBranchId
236 , _phylo_groupLevelParents :: [Pointer]
237 , _phylo_groupLevelChilds :: [Pointer]
238 , _phylo_groupPeriodParents :: [Pointer]
239 , _phylo_groupPeriodChilds :: [Pointer]
240 , _phylo_groupBreakPointer :: Maybe Pointer
241 }
242 deriving (Generic, Show, Eq)
243
244 -- | Weight : A generic mesure that can be associated with an Id
245 type Weight = Double
246
247 -- | Pointer : A weighted pointer to a given PhyloGroup
248 type Pointer = (PhyloGroupId, Weight)
249
250 type Link = ((PhyloGroupId, PhyloGroupId), Weight)
251
252 data Filiation = ToParents | ToChilds deriving (Generic, Show)
253 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
254
255
256 ---------------------------
257 -- | Frequent Item Set | --
258 ---------------------------
259
260 -- | Clique : Set of ngrams cooccurring in the same Document
261 type Clique = Set Ngrams
262
263 -- | Support : Number of Documents where a Clique occurs
264 type Support = Int
265
266 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
267 data PhyloFis = PhyloFis
268 { _phyloFis_clique :: Clique
269 , _phyloFis_support :: Support
270 , _phyloFis_period :: (Date,Date)
271 } deriving (Generic,NFData,Show,Eq)
272
273
274 ----------------
275 -- | Lenses | --
276 ----------------
277
278 makeLenses ''Config
279 makeLenses ''PhyloFoundations
280 makeLenses ''PhyloFis
281 makeLenses ''Phylo
282 makeLenses ''PhyloPeriod
283 makeLenses ''PhyloLevel
284 makeLenses ''PhyloGroup
285 makeLenses ''PhyloParam
286
287 ------------------------
288 -- | JSON instances | --
289 ------------------------
290
291
292 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)