2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.AdaptativePhylo where
30 import Data.Aeson.TH (deriveJSON)
31 import Data.Text (Text, pack)
32 import Data.Vector (Vector)
35 import Gargantext.Core.Utils.Prefix (unPrefix)
36 import Gargantext.Prelude
37 import Gargantext.Core.Text.Context (TermList)
40 import GHC.IO (FilePath)
41 import Control.DeepSeq (NFData)
42 import Control.Lens (makeLenses)
44 import qualified Data.Text.Lazy as TextLazy
53 Wos {_wos_limit :: Int}
54 | Csv {_csv_limit :: Int}
55 deriving (Show,Generic,Eq)
59 { _cons_start :: Double
60 , _cons_step :: Double }
62 { _adap_granularity :: Double }
63 deriving (Show,Generic,Eq)
67 { _wlj_sensibility :: Double
69 -- , _wlj_thresholdInit :: Double
70 -- , _wlj_thresholdStep :: Double
71 -- | max height for sea level in temporal matching
72 -- , _wlj_elevation :: Double
76 { _wlj_sensibility :: Double
78 -- , _wlj_thresholdInit :: Double
79 -- , _wlj_thresholdStep :: Double
80 -- | max height for sea level in temporal matching
81 -- , _wlj_elevation :: Double
85 deriving (Show,Generic,Eq)
88 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
90 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
94 { _bpt_threshold :: Double
95 , _bpt_sensibility :: Double
96 , _bpt_scope :: SynchronyScope
97 , _bpt_strategy :: SynchronyStrategy }
98 | ByProximityDistribution
99 { _bpd_sensibility :: Double
100 , _bpd_strategy :: SynchronyStrategy }
101 deriving (Show,Generic,Eq)
106 { _year_period :: Int
108 , _year_matchingFrame :: Int }
109 deriving (Show,Generic,Eq)
111 data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
115 { _fis_support :: Int
119 , _mcl_threshold :: Double
120 , _mcl_filter :: CliqueFilter }
121 deriving (Show,Generic,Eq)
125 Quality { _qua_granularity :: Double
126 , _qua_minBranch :: Int }
127 deriving (Show,Generic,Eq)
131 Config { corpusPath :: FilePath
132 , listPath :: FilePath
133 , outputPath :: FilePath
134 , corpusParser :: CorpusParser
137 , phyloProximity :: Proximity
138 , seaElevation :: SeaElevation
139 , findAncestors :: Bool
140 , phyloSynchrony :: Synchrony
141 , phyloQuality :: Quality
142 , timeUnit :: TimeUnit
144 , exportLabel :: [PhyloLabel]
146 , exportFilter :: [Filter]
147 } deriving (Show,Generic,Eq)
150 defaultConfig :: Config
152 Config { corpusPath = ""
155 , corpusParser = Csv 1000
156 , phyloName = pack "Default Phylo"
158 , phyloProximity = WeightedLogJaccard 10
159 , seaElevation = Constante 0.1 0.1
160 , findAncestors = True
161 , phyloSynchrony = ByProximityThreshold 0.1 10 SiblingBranches MergeAllGroups
162 , phyloQuality = Quality 0 1
163 , timeUnit = Year 3 1 5
164 , clique = MaxClique 0 3 ByNeighbours
165 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
166 , exportSort = ByHierarchy
167 , exportFilter = [ByBranchSize 2]
170 instance FromJSON Config
171 instance ToJSON Config
172 instance FromJSON CorpusParser
173 instance ToJSON CorpusParser
174 instance FromJSON Proximity
175 instance ToJSON Proximity
176 instance FromJSON SeaElevation
177 instance ToJSON SeaElevation
178 instance FromJSON TimeUnit
179 instance ToJSON TimeUnit
180 instance FromJSON CliqueFilter
181 instance ToJSON CliqueFilter
182 instance FromJSON Clique
183 instance ToJSON Clique
184 instance FromJSON PhyloLabel
185 instance ToJSON PhyloLabel
186 instance FromJSON Tagger
187 instance ToJSON Tagger
188 instance FromJSON Sort
190 instance FromJSON Order
191 instance ToJSON Order
192 instance FromJSON Filter
193 instance ToJSON Filter
194 instance FromJSON SynchronyScope
195 instance ToJSON SynchronyScope
196 instance FromJSON SynchronyStrategy
197 instance ToJSON SynchronyStrategy
198 instance FromJSON Synchrony
199 instance ToJSON Synchrony
200 instance FromJSON Quality
201 instance ToJSON Quality
204 -- | Software parameters
206 Software { _software_name :: Text
207 , _software_version :: Text
208 } deriving (Generic, Show, Eq)
210 defaultSoftware :: Software
212 Software { _software_name = pack "Gargantext"
213 , _software_version = pack "v4" }
216 -- | Global parameters of a Phylo
218 PhyloParam { _phyloParam_version :: Text
219 , _phyloParam_software :: Software
220 , _phyloParam_config :: Config
221 } deriving (Generic, Show, Eq)
223 defaultPhyloParam :: PhyloParam
225 PhyloParam { _phyloParam_version = pack "v2.adaptative"
226 , _phyloParam_software = defaultSoftware
227 , _phyloParam_config = defaultConfig }
235 -- | Date : a simple Integer
238 -- | Ngrams : a contiguous sequence of n terms
241 -- | Document : a piece of Text linked to a Date
242 data Document = Document
245 } deriving (Eq,Show,Generic,NFData)
253 -- | The Foundations of a Phylo created from a given TermList
254 data PhyloFoundations = PhyloFoundations
255 { _foundations_roots :: !(Vector Ngrams)
256 , _foundations_mapList :: TermList
257 } deriving (Generic, Show, Eq)
260 ---------------------------
261 -- | Coocurency Matrix | --
262 ---------------------------
265 -- | Cooc : a coocurency matrix between two ngrams
266 type Cooc = Map (Int,Int) Double
274 -- | Phylo datatype of a phylomemy
275 -- foundations : the foundations of the phylo
276 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
277 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
278 -- param : the parameters of the phylomemy (with the user's configuration)
279 -- periods : the temporal steps of a phylomemy
281 Phylo { _phylo_foundations :: PhyloFoundations
282 , _phylo_timeCooc :: !(Map Date Cooc)
283 , _phylo_timeDocs :: !(Map Date Double)
284 , _phylo_termFreq :: !(Map Int Double)
285 , _phylo_lastTermFreq :: !(Map Int Double)
286 , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
287 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
288 , _phylo_param :: PhyloParam
289 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
291 deriving (Generic, Show, Eq)
294 -- | PhyloPeriodId : the id of a given period
295 type PhyloPeriodId = (Date,Date)
297 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
298 -- id: tuple (start date, end date) of the temporal step of the phylomemy
299 -- levels: levels of granularity
301 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
302 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
303 } deriving (Generic, Show, Eq)
306 -- | Level : a level of clustering
309 -- | PhyloLevelId : the id of a level of clustering in a given period
310 type PhyloLevelId = (PhyloPeriodId,Level)
312 -- | PhyloLevel : levels of phylomemy on a synchronic axis
313 -- Levels description:
314 -- Level 0: The foundations and the base of the phylo
315 -- Level 1: First level of clustering (the Fis)
316 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
318 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
319 , _phylo_levelLevel :: Level
320 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
322 deriving (Generic, Show, Eq)
325 type PhyloGroupId = (PhyloLevelId, Int)
327 -- | BranchId : (a level, a sequence of branch index)
328 -- the sequence is a path of heritage from the most to the less specific branch
329 type PhyloBranchId = (Level, [Int])
331 -- | PhyloGroup : group of ngrams at each level and period
333 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
334 , _phylo_groupLevel :: Level
335 , _phylo_groupIndex :: Int
336 , _phylo_groupLabel :: Text
337 , _phylo_groupSupport :: Support
338 , _phylo_groupNgrams :: [Int]
339 , _phylo_groupCooc :: !(Cooc)
340 , _phylo_groupBranchId :: PhyloBranchId
341 , _phylo_groupMeta :: Map Text [Double]
342 , _phylo_groupLevelParents :: [Pointer]
343 , _phylo_groupLevelChilds :: [Pointer]
344 , _phylo_groupPeriodParents :: [Pointer]
345 , _phylo_groupPeriodChilds :: [Pointer]
346 , _phylo_groupAncestors :: [Pointer]
348 deriving (Generic, Show, Eq, NFData)
350 -- | Weight : A generic mesure that can be associated with an Id
353 -- | Pointer : A weighted pointer to a given PhyloGroup
354 type Pointer = (PhyloGroupId, Weight)
356 data Filiation = ToParents | ToChilds deriving (Generic, Show)
357 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
360 ----------------------
361 -- | Phylo Clique | --
362 ----------------------
364 -- | Support : Number of Documents where a Clique occurs
367 data PhyloClique = PhyloClique
368 { _phyloClique_nodes :: [Int]
369 , _phyloClique_support :: Support
370 , _phyloClique_period :: (Date,Date)
371 } deriving (Generic,NFData,Show,Eq)
377 type DotId = TextLazy.Text
379 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
381 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
383 data Order = Asc | Desc deriving (Show,Generic,Eq)
385 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
387 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
391 { _branch_labelTagger :: Tagger
392 , _branch_labelSize :: Int }
394 { _group_labelTagger :: Tagger
395 , _group_labelSize :: Int }
396 deriving (Show,Generic,Eq)
400 { _branch_id :: PhyloBranchId
401 , _branch_canonId :: [Int]
402 , _branch_seaLevel :: [Double]
403 , _branch_x :: Double
404 , _branch_y :: Double
405 , _branch_w :: Double
406 , _branch_t :: Double
407 , _branch_label :: Text
408 , _branch_meta :: Map Text [Double]
409 } deriving (Generic, Show, Eq)
413 { _export_groups :: [PhyloGroup]
414 , _export_branches :: [PhyloBranch]
415 } deriving (Generic, Show)
422 makeLenses ''Proximity
423 makeLenses ''SeaElevation
426 makeLenses ''PhyloLabel
427 makeLenses ''TimeUnit
428 makeLenses ''PhyloFoundations
429 makeLenses ''PhyloClique
431 makeLenses ''PhyloPeriod
432 makeLenses ''PhyloLevel
433 makeLenses ''PhyloGroup
434 makeLenses ''PhyloParam
435 makeLenses ''PhyloExport
436 makeLenses ''PhyloBranch
438 ------------------------
439 -- | JSON instances | --
440 ------------------------
442 instance FromJSON Phylo
443 instance ToJSON Phylo
444 instance FromJSON PhyloParam
445 instance ToJSON PhyloParam
446 instance FromJSON PhyloPeriod
447 instance ToJSON PhyloPeriod
448 instance FromJSON PhyloLevel
449 instance ToJSON PhyloLevel
450 instance FromJSON Software
451 instance ToJSON Software
452 instance FromJSON PhyloGroup
453 instance ToJSON PhyloGroup
455 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)