2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.AdaptativePhylo where
30 import Data.Aeson.TH (deriveJSON)
31 import Data.Text (Text, pack)
32 import Data.Vector (Vector)
35 import Gargantext.Core.Utils.Prefix (unPrefix)
36 import Gargantext.Prelude
37 import Gargantext.Core.Text.Context (TermList)
40 import GHC.IO (FilePath)
41 import Control.DeepSeq (NFData)
42 import Control.Lens (makeLenses)
44 import qualified Data.Text.Lazy as TextLazy
53 Wos {_wos_limit :: Int}
54 | Csv {_csv_limit :: Int}
55 deriving (Show,Generic,Eq)
59 { _cons_start :: Double
60 , _cons_step :: Double }
62 { _adap_granularity :: Double }
63 deriving (Show,Generic,Eq)
67 { _wlj_sensibility :: Double
69 -- , _wlj_thresholdInit :: Double
70 -- , _wlj_thresholdStep :: Double
71 -- | max height for sea level in temporal matching
72 -- , _wlj_elevation :: Double
76 deriving (Show,Generic,Eq)
79 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
81 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
85 { _bpt_threshold :: Double
86 , _bpt_sensibility :: Double
87 , _bpt_scope :: SynchronyScope
88 , _bpt_strategy :: SynchronyStrategy }
89 | ByProximityDistribution
90 { _bpd_sensibility :: Double
91 , _bpd_strategy :: SynchronyStrategy }
92 deriving (Show,Generic,Eq)
99 , _year_matchingFrame :: Int }
100 deriving (Show,Generic,Eq)
105 { _fis_support :: Int
109 deriving (Show,Generic,Eq)
113 Quality { _qua_granularity :: Double
114 , _qua_minBranch :: Int }
115 deriving (Show,Generic,Eq)
119 Config { corpusPath :: FilePath
120 , listPath :: FilePath
121 , outputPath :: FilePath
122 , corpusParser :: CorpusParser
125 , phyloProximity :: Proximity
126 , seaElevation :: SeaElevation
127 , phyloSynchrony :: Synchrony
128 , phyloQuality :: Quality
129 , timeUnit :: TimeUnit
131 , exportLabel :: [PhyloLabel]
133 , exportFilter :: [Filter]
134 } deriving (Show,Generic,Eq)
137 defaultConfig :: Config
139 Config { corpusPath = ""
142 , corpusParser = Csv 1000
143 , phyloName = pack "Default Phylo"
145 , phyloProximity = WeightedLogJaccard 10
146 , seaElevation = Constante 0.1 0.1
147 , phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
148 , phyloQuality = Quality 100 1
149 , timeUnit = Year 3 1 5
150 , clique = MaxClique 0
151 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
152 , exportSort = ByHierarchy
153 , exportFilter = [ByBranchSize 2]
156 instance FromJSON Config
157 instance ToJSON Config
158 instance FromJSON CorpusParser
159 instance ToJSON CorpusParser
160 instance FromJSON Proximity
161 instance ToJSON Proximity
162 instance FromJSON SeaElevation
163 instance ToJSON SeaElevation
164 instance FromJSON TimeUnit
165 instance ToJSON TimeUnit
166 instance FromJSON Clique
167 instance ToJSON Clique
168 instance FromJSON PhyloLabel
169 instance ToJSON PhyloLabel
170 instance FromJSON Tagger
171 instance ToJSON Tagger
172 instance FromJSON Sort
174 instance FromJSON Order
175 instance ToJSON Order
176 instance FromJSON Filter
177 instance ToJSON Filter
178 instance FromJSON SynchronyScope
179 instance ToJSON SynchronyScope
180 instance FromJSON SynchronyStrategy
181 instance ToJSON SynchronyStrategy
182 instance FromJSON Synchrony
183 instance ToJSON Synchrony
184 instance FromJSON Quality
185 instance ToJSON Quality
188 -- | Software parameters
190 Software { _software_name :: Text
191 , _software_version :: Text
192 } deriving (Generic, Show, Eq)
194 defaultSoftware :: Software
196 Software { _software_name = pack "Gargantext"
197 , _software_version = pack "v4" }
200 -- | Global parameters of a Phylo
202 PhyloParam { _phyloParam_version :: Text
203 , _phyloParam_software :: Software
204 , _phyloParam_config :: Config
205 } deriving (Generic, Show, Eq)
207 defaultPhyloParam :: PhyloParam
209 PhyloParam { _phyloParam_version = pack "v2.adaptative"
210 , _phyloParam_software = defaultSoftware
211 , _phyloParam_config = defaultConfig }
219 -- | Date : a simple Integer
222 -- | Ngrams : a contiguous sequence of n terms
225 -- | Document : a piece of Text linked to a Date
226 data Document = Document
229 } deriving (Eq,Show,Generic,NFData)
237 -- | The Foundations of a Phylo created from a given TermList
238 data PhyloFoundations = PhyloFoundations
239 { _foundations_roots :: !(Vector Ngrams)
240 , _foundations_mapList :: TermList
241 } deriving (Generic, Show, Eq)
244 ---------------------------
245 -- | Coocurency Matrix | --
246 ---------------------------
249 -- | Cooc : a coocurency matrix between two ngrams
250 type Cooc = Map (Int,Int) Double
258 -- | Phylo datatype of a phylomemy
259 -- foundations : the foundations of the phylo
260 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
261 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
262 -- param : the parameters of the phylomemy (with the user's configuration)
263 -- periods : the temporal steps of a phylomemy
265 Phylo { _phylo_foundations :: PhyloFoundations
266 , _phylo_timeCooc :: !(Map Date Cooc)
267 , _phylo_timeDocs :: !(Map Date Double)
268 , _phylo_termFreq :: !(Map Int Double)
269 , _phylo_lastTermFreq :: !(Map Int Double)
270 , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
271 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
272 , _phylo_param :: PhyloParam
273 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
275 deriving (Generic, Show, Eq)
278 -- | PhyloPeriodId : the id of a given period
279 type PhyloPeriodId = (Date,Date)
281 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
282 -- id: tuple (start date, end date) of the temporal step of the phylomemy
283 -- levels: levels of granularity
285 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
286 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
287 } deriving (Generic, Show, Eq)
290 -- | Level : a level of clustering
293 -- | PhyloLevelId : the id of a level of clustering in a given period
294 type PhyloLevelId = (PhyloPeriodId,Level)
296 -- | PhyloLevel : levels of phylomemy on a synchronic axis
297 -- Levels description:
298 -- Level 0: The foundations and the base of the phylo
299 -- Level 1: First level of clustering (the Fis)
300 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
302 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
303 , _phylo_levelLevel :: Level
304 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
306 deriving (Generic, Show, Eq)
309 type PhyloGroupId = (PhyloLevelId, Int)
311 -- | BranchId : (a level, a sequence of branch index)
312 -- the sequence is a path of heritage from the most to the less specific branch
313 type PhyloBranchId = (Level, [Int])
315 -- | PhyloGroup : group of ngrams at each level and period
317 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
318 , _phylo_groupLevel :: Level
319 , _phylo_groupIndex :: Int
320 , _phylo_groupLabel :: Text
321 , _phylo_groupSupport :: Support
322 , _phylo_groupNgrams :: [Int]
323 , _phylo_groupCooc :: !(Cooc)
324 , _phylo_groupBranchId :: PhyloBranchId
325 , _phylo_groupMeta :: Map Text [Double]
326 , _phylo_groupLevelParents :: [Pointer]
327 , _phylo_groupLevelChilds :: [Pointer]
328 , _phylo_groupPeriodParents :: [Pointer]
329 , _phylo_groupPeriodChilds :: [Pointer]
330 , _phylo_groupAncestors :: [Pointer]
332 deriving (Generic, Show, Eq, NFData)
334 -- | Weight : A generic mesure that can be associated with an Id
337 -- | Pointer : A weighted pointer to a given PhyloGroup
338 type Pointer = (PhyloGroupId, Weight)
340 data Filiation = ToParents | ToChilds deriving (Generic, Show)
341 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
344 ----------------------
345 -- | Phylo Clique | --
346 ----------------------
348 -- | Support : Number of Documents where a Clique occurs
351 data PhyloClique = PhyloClique
352 { _phyloClique_nodes :: [Int]
353 , _phyloClique_support :: Support
354 , _phyloClique_period :: (Date,Date)
355 } deriving (Generic,NFData,Show,Eq)
361 type DotId = TextLazy.Text
363 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
365 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
367 data Order = Asc | Desc deriving (Show,Generic,Eq)
369 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
371 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
375 { _branch_labelTagger :: Tagger
376 , _branch_labelSize :: Int }
378 { _group_labelTagger :: Tagger
379 , _group_labelSize :: Int }
380 deriving (Show,Generic,Eq)
384 { _branch_id :: PhyloBranchId
385 , _branch_canonId :: [Int]
386 , _branch_seaLevel :: [Double]
387 , _branch_x :: Double
388 , _branch_y :: Double
389 , _branch_w :: Double
390 , _branch_t :: Double
391 , _branch_label :: Text
392 , _branch_meta :: Map Text [Double]
393 } deriving (Generic, Show, Eq)
397 { _export_groups :: [PhyloGroup]
398 , _export_branches :: [PhyloBranch]
399 } deriving (Generic, Show)
406 makeLenses ''Proximity
407 makeLenses ''SeaElevation
410 makeLenses ''PhyloLabel
411 makeLenses ''TimeUnit
412 makeLenses ''PhyloFoundations
413 makeLenses ''PhyloClique
415 makeLenses ''PhyloPeriod
416 makeLenses ''PhyloLevel
417 makeLenses ''PhyloGroup
418 makeLenses ''PhyloParam
419 makeLenses ''PhyloExport
420 makeLenses ''PhyloBranch
422 ------------------------
423 -- | JSON instances | --
424 ------------------------
427 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)