2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.AdaptativePhylo where
30 import Data.Aeson.TH (deriveJSON)
31 import Data.Text (Text, pack)
32 import Data.Vector (Vector)
35 import Gargantext.Core.Utils.Prefix (unPrefix)
36 import Gargantext.Prelude
37 import Gargantext.Core.Text.Context (TermList)
40 import GHC.IO (FilePath)
41 import Control.DeepSeq (NFData)
42 import Control.Lens (makeLenses)
44 import qualified Data.Text.Lazy as TextLazy
53 Wos {_wos_limit :: Int}
54 | Csv {_csv_limit :: Int}
55 deriving (Show,Generic,Eq)
59 { _cons_start :: Double
60 , _cons_step :: Double }
62 { _adap_granularity :: Double }
63 deriving (Show,Generic,Eq)
67 { _wlj_sensibility :: Double
69 -- , _wlj_thresholdInit :: Double
70 -- , _wlj_thresholdStep :: Double
71 -- | max height for sea level in temporal matching
72 -- , _wlj_elevation :: Double
76 { _wlj_sensibility :: Double
78 -- , _wlj_thresholdInit :: Double
79 -- , _wlj_thresholdStep :: Double
80 -- | max height for sea level in temporal matching
81 -- , _wlj_elevation :: Double
85 deriving (Show,Generic,Eq)
88 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
90 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
94 { _bpt_threshold :: Double
95 , _bpt_sensibility :: Double
96 , _bpt_scope :: SynchronyScope
97 , _bpt_strategy :: SynchronyStrategy }
98 | ByProximityDistribution
99 { _bpd_sensibility :: Double
100 , _bpd_strategy :: SynchronyStrategy }
101 deriving (Show,Generic,Eq)
106 { _year_period :: Int
108 , _year_matchingFrame :: Int }
109 deriving (Show,Generic,Eq)
114 { _fis_support :: Int
118 deriving (Show,Generic,Eq)
122 Quality { _qua_granularity :: Double
123 , _qua_minBranch :: Int }
124 deriving (Show,Generic,Eq)
128 Config { corpusPath :: FilePath
129 , listPath :: FilePath
130 , outputPath :: FilePath
131 , corpusParser :: CorpusParser
134 , phyloProximity :: Proximity
135 , seaElevation :: SeaElevation
136 , findAncestors :: Bool
137 , phyloSynchrony :: Synchrony
138 , phyloQuality :: Quality
139 , timeUnit :: TimeUnit
141 , exportLabel :: [PhyloLabel]
143 , exportFilter :: [Filter]
144 } deriving (Show,Generic,Eq)
147 defaultConfig :: Config
149 Config { corpusPath = ""
152 , corpusParser = Csv 1000
153 , phyloName = pack "Default Phylo"
155 , phyloProximity = WeightedLogJaccard 10
156 , seaElevation = Constante 0.1 0.1
157 , findAncestors = True
158 , phyloSynchrony = ByProximityThreshold 0.2 10 SiblingBranches MergeAllGroups
159 , phyloQuality = Quality 0 1
160 , timeUnit = Year 3 1 5
161 , clique = MaxClique 0
162 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
163 , exportSort = ByHierarchy
164 , exportFilter = [ByBranchSize 2]
167 instance FromJSON Config
168 instance ToJSON Config
169 instance FromJSON CorpusParser
170 instance ToJSON CorpusParser
171 instance FromJSON Proximity
172 instance ToJSON Proximity
173 instance FromJSON SeaElevation
174 instance ToJSON SeaElevation
175 instance FromJSON TimeUnit
176 instance ToJSON TimeUnit
177 instance FromJSON Clique
178 instance ToJSON Clique
179 instance FromJSON PhyloLabel
180 instance ToJSON PhyloLabel
181 instance FromJSON Tagger
182 instance ToJSON Tagger
183 instance FromJSON Sort
185 instance FromJSON Order
186 instance ToJSON Order
187 instance FromJSON Filter
188 instance ToJSON Filter
189 instance FromJSON SynchronyScope
190 instance ToJSON SynchronyScope
191 instance FromJSON SynchronyStrategy
192 instance ToJSON SynchronyStrategy
193 instance FromJSON Synchrony
194 instance ToJSON Synchrony
195 instance FromJSON Quality
196 instance ToJSON Quality
199 -- | Software parameters
201 Software { _software_name :: Text
202 , _software_version :: Text
203 } deriving (Generic, Show, Eq)
205 defaultSoftware :: Software
207 Software { _software_name = pack "Gargantext"
208 , _software_version = pack "v4" }
211 -- | Global parameters of a Phylo
213 PhyloParam { _phyloParam_version :: Text
214 , _phyloParam_software :: Software
215 , _phyloParam_config :: Config
216 } deriving (Generic, Show, Eq)
218 defaultPhyloParam :: PhyloParam
220 PhyloParam { _phyloParam_version = pack "v2.adaptative"
221 , _phyloParam_software = defaultSoftware
222 , _phyloParam_config = defaultConfig }
230 -- | Date : a simple Integer
233 -- | Ngrams : a contiguous sequence of n terms
236 -- | Document : a piece of Text linked to a Date
237 data Document = Document
240 } deriving (Eq,Show,Generic,NFData)
248 -- | The Foundations of a Phylo created from a given TermList
249 data PhyloFoundations = PhyloFoundations
250 { _foundations_roots :: !(Vector Ngrams)
251 , _foundations_mapList :: TermList
252 } deriving (Generic, Show, Eq)
255 ---------------------------
256 -- | Coocurency Matrix | --
257 ---------------------------
260 -- | Cooc : a coocurency matrix between two ngrams
261 type Cooc = Map (Int,Int) Double
269 -- | Phylo datatype of a phylomemy
270 -- foundations : the foundations of the phylo
271 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
272 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
273 -- param : the parameters of the phylomemy (with the user's configuration)
274 -- periods : the temporal steps of a phylomemy
276 Phylo { _phylo_foundations :: PhyloFoundations
277 , _phylo_timeCooc :: !(Map Date Cooc)
278 , _phylo_timeDocs :: !(Map Date Double)
279 , _phylo_termFreq :: !(Map Int Double)
280 , _phylo_lastTermFreq :: !(Map Int Double)
281 , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
282 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
283 , _phylo_param :: PhyloParam
284 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
286 deriving (Generic, Show, Eq)
289 -- | PhyloPeriodId : the id of a given period
290 type PhyloPeriodId = (Date,Date)
292 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
293 -- id: tuple (start date, end date) of the temporal step of the phylomemy
294 -- levels: levels of granularity
296 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
297 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
298 } deriving (Generic, Show, Eq)
301 -- | Level : a level of clustering
304 -- | PhyloLevelId : the id of a level of clustering in a given period
305 type PhyloLevelId = (PhyloPeriodId,Level)
307 -- | PhyloLevel : levels of phylomemy on a synchronic axis
308 -- Levels description:
309 -- Level 0: The foundations and the base of the phylo
310 -- Level 1: First level of clustering (the Fis)
311 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
313 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
314 , _phylo_levelLevel :: Level
315 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
317 deriving (Generic, Show, Eq)
320 type PhyloGroupId = (PhyloLevelId, Int)
322 -- | BranchId : (a level, a sequence of branch index)
323 -- the sequence is a path of heritage from the most to the less specific branch
324 type PhyloBranchId = (Level, [Int])
326 -- | PhyloGroup : group of ngrams at each level and period
328 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
329 , _phylo_groupLevel :: Level
330 , _phylo_groupIndex :: Int
331 , _phylo_groupLabel :: Text
332 , _phylo_groupSupport :: Support
333 , _phylo_groupNgrams :: [Int]
334 , _phylo_groupCooc :: !(Cooc)
335 , _phylo_groupBranchId :: PhyloBranchId
336 , _phylo_groupMeta :: Map Text [Double]
337 , _phylo_groupLevelParents :: [Pointer]
338 , _phylo_groupLevelChilds :: [Pointer]
339 , _phylo_groupPeriodParents :: [Pointer]
340 , _phylo_groupPeriodChilds :: [Pointer]
341 , _phylo_groupAncestors :: [Pointer]
343 deriving (Generic, Show, Eq, NFData)
345 -- | Weight : A generic mesure that can be associated with an Id
348 -- | Pointer : A weighted pointer to a given PhyloGroup
349 type Pointer = (PhyloGroupId, Weight)
351 data Filiation = ToParents | ToChilds deriving (Generic, Show)
352 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
355 ----------------------
356 -- | Phylo Clique | --
357 ----------------------
359 -- | Support : Number of Documents where a Clique occurs
362 data PhyloClique = PhyloClique
363 { _phyloClique_nodes :: [Int]
364 , _phyloClique_support :: Support
365 , _phyloClique_period :: (Date,Date)
366 } deriving (Generic,NFData,Show,Eq)
372 type DotId = TextLazy.Text
374 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
376 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
378 data Order = Asc | Desc deriving (Show,Generic,Eq)
380 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
382 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
386 { _branch_labelTagger :: Tagger
387 , _branch_labelSize :: Int }
389 { _group_labelTagger :: Tagger
390 , _group_labelSize :: Int }
391 deriving (Show,Generic,Eq)
395 { _branch_id :: PhyloBranchId
396 , _branch_canonId :: [Int]
397 , _branch_seaLevel :: [Double]
398 , _branch_x :: Double
399 , _branch_y :: Double
400 , _branch_w :: Double
401 , _branch_t :: Double
402 , _branch_label :: Text
403 , _branch_meta :: Map Text [Double]
404 } deriving (Generic, Show, Eq)
408 { _export_groups :: [PhyloGroup]
409 , _export_branches :: [PhyloBranch]
410 } deriving (Generic, Show)
417 makeLenses ''Proximity
418 makeLenses ''SeaElevation
421 makeLenses ''PhyloLabel
422 makeLenses ''TimeUnit
423 makeLenses ''PhyloFoundations
424 makeLenses ''PhyloClique
426 makeLenses ''PhyloPeriod
427 makeLenses ''PhyloLevel
428 makeLenses ''PhyloGroup
429 makeLenses ''PhyloParam
430 makeLenses ''PhyloExport
431 makeLenses ''PhyloBranch
433 ------------------------
434 -- | JSON instances | --
435 ------------------------
438 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)