2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.AdaptativePhylo where
30 import Data.Aeson.TH (deriveJSON)
31 import Data.Text (Text, pack)
32 import Data.Vector (Vector)
35 import Gargantext.Core.Utils.Prefix (unPrefix)
36 import Gargantext.Prelude
37 import Gargantext.Core.Text.Context (TermList)
40 import GHC.IO (FilePath)
41 import Control.DeepSeq (NFData)
42 import Control.Lens (makeLenses)
44 import qualified Data.Text.Lazy as TextLazy
53 Wos {_wos_limit :: Int}
54 | Csv {_csv_limit :: Int}
55 | CsvWeighted {_csvw_limit :: Int}
56 deriving (Show,Generic,Eq)
60 { _cons_start :: Double
61 , _cons_step :: Double }
63 { _adap_granularity :: Double }
64 deriving (Show,Generic,Eq)
68 { _wlj_sensibility :: Double
70 -- , _wlj_thresholdInit :: Double
71 -- , _wlj_thresholdStep :: Double
72 -- | max height for sea level in temporal matching
73 -- , _wlj_elevation :: Double
77 { _wlj_sensibility :: Double
79 -- , _wlj_thresholdInit :: Double
80 -- , _wlj_thresholdStep :: Double
81 -- | max height for sea level in temporal matching
82 -- , _wlj_elevation :: Double
86 deriving (Show,Generic,Eq)
89 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
91 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
95 { _bpt_threshold :: Double
96 , _bpt_sensibility :: Double
97 , _bpt_scope :: SynchronyScope
98 , _bpt_strategy :: SynchronyStrategy }
99 | ByProximityDistribution
100 { _bpd_sensibility :: Double
101 , _bpd_strategy :: SynchronyStrategy }
102 deriving (Show,Generic,Eq)
107 { _year_period :: Int
109 , _year_matchingFrame :: Int }
110 deriving (Show,Generic,Eq)
112 data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
116 { _fis_support :: Int
120 , _mcl_threshold :: Double
121 , _mcl_filter :: CliqueFilter }
122 deriving (Show,Generic,Eq)
126 Quality { _qua_granularity :: Double
127 , _qua_minBranch :: Int }
128 deriving (Show,Generic,Eq)
132 Config { corpusPath :: FilePath
133 , listPath :: FilePath
134 , outputPath :: FilePath
135 , corpusParser :: CorpusParser
138 , phyloProximity :: Proximity
139 , seaElevation :: SeaElevation
140 , findAncestors :: Bool
141 , phyloSynchrony :: Synchrony
142 , phyloQuality :: Quality
143 , timeUnit :: TimeUnit
145 , exportLabel :: [PhyloLabel]
147 , exportFilter :: [Filter]
148 } deriving (Show,Generic,Eq)
151 defaultConfig :: Config
153 Config { corpusPath = ""
156 , corpusParser = Csv 1000
157 , phyloName = pack "Default Phylo"
159 , phyloProximity = WeightedLogJaccard 10
160 , seaElevation = Constante 0.1 0.1
161 , findAncestors = True
162 , phyloSynchrony = ByProximityThreshold 0.1 10 SiblingBranches MergeAllGroups
163 , phyloQuality = Quality 0 1
164 , timeUnit = Year 3 1 5
165 , clique = MaxClique 0 3 ByNeighbours
166 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
167 , exportSort = ByHierarchy
168 , exportFilter = [ByBranchSize 2]
171 instance FromJSON Config
172 instance ToJSON Config
173 instance FromJSON CorpusParser
174 instance ToJSON CorpusParser
175 instance FromJSON Proximity
176 instance ToJSON Proximity
177 instance FromJSON SeaElevation
178 instance ToJSON SeaElevation
179 instance FromJSON TimeUnit
180 instance ToJSON TimeUnit
181 instance FromJSON CliqueFilter
182 instance ToJSON CliqueFilter
183 instance FromJSON Clique
184 instance ToJSON Clique
185 instance FromJSON PhyloLabel
186 instance ToJSON PhyloLabel
187 instance FromJSON Tagger
188 instance ToJSON Tagger
189 instance FromJSON Sort
191 instance FromJSON Order
192 instance ToJSON Order
193 instance FromJSON Filter
194 instance ToJSON Filter
195 instance FromJSON SynchronyScope
196 instance ToJSON SynchronyScope
197 instance FromJSON SynchronyStrategy
198 instance ToJSON SynchronyStrategy
199 instance FromJSON Synchrony
200 instance ToJSON Synchrony
201 instance FromJSON Quality
202 instance ToJSON Quality
205 -- | Software parameters
207 Software { _software_name :: Text
208 , _software_version :: Text
209 } deriving (Generic, Show, Eq)
211 defaultSoftware :: Software
213 Software { _software_name = pack "Gargantext"
214 , _software_version = pack "v4" }
217 -- | Global parameters of a Phylo
219 PhyloParam { _phyloParam_version :: Text
220 , _phyloParam_software :: Software
221 , _phyloParam_config :: Config
222 } deriving (Generic, Show, Eq)
224 defaultPhyloParam :: PhyloParam
226 PhyloParam { _phyloParam_version = pack "v2.adaptative"
227 , _phyloParam_software = defaultSoftware
228 , _phyloParam_config = defaultConfig }
235 -- | Date : a simple Integer
238 -- | Ngrams : a contiguous sequence of n terms
241 -- | Document : a piece of Text linked to a Date
242 data Document = Document
245 , weight :: Maybe Double
246 } deriving (Eq,Show,Generic,NFData)
254 -- | The Foundations of a Phylo created from a given TermList
255 data PhyloFoundations = PhyloFoundations
256 { _foundations_roots :: !(Vector Ngrams)
257 , _foundations_mapList :: TermList
258 } deriving (Generic, Show, Eq)
261 ---------------------------
262 -- | Coocurency Matrix | --
263 ---------------------------
266 -- | Cooc : a coocurency matrix between two ngrams
267 type Cooc = Map (Int,Int) Double
275 -- | Phylo datatype of a phylomemy
276 -- foundations : the foundations of the phylo
277 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
278 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
279 -- param : the parameters of the phylomemy (with the user's configuration)
280 -- periods : the temporal steps of a phylomemy
282 Phylo { _phylo_foundations :: PhyloFoundations
283 , _phylo_timeCooc :: !(Map Date Cooc)
284 , _phylo_timeDocs :: !(Map Date Double)
285 , _phylo_termFreq :: !(Map Int Double)
286 , _phylo_lastTermFreq :: !(Map Int Double)
287 , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
288 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
289 , _phylo_param :: PhyloParam
290 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
292 deriving (Generic, Show, Eq)
295 -- | PhyloPeriodId : the id of a given period
296 type PhyloPeriodId = (Date,Date)
298 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
299 -- id: tuple (start date, end date) of the temporal step of the phylomemy
300 -- levels: levels of granularity
302 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
303 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
304 } deriving (Generic, Show, Eq)
307 -- | Level : a level of clustering
310 -- | PhyloLevelId : the id of a level of clustering in a given period
311 type PhyloLevelId = (PhyloPeriodId,Level)
313 -- | PhyloLevel : levels of phylomemy on a synchronic axis
314 -- Levels description:
315 -- Level 0: The foundations and the base of the phylo
316 -- Level 1: First level of clustering (the Fis)
317 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
319 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
320 , _phylo_levelLevel :: Level
321 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
323 deriving (Generic, Show, Eq)
326 type PhyloGroupId = (PhyloLevelId, Int)
328 -- | BranchId : (a level, a sequence of branch index)
329 -- the sequence is a path of heritage from the most to the less specific branch
330 type PhyloBranchId = (Level, [Int])
332 -- | PhyloGroup : group of ngrams at each level and period
334 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
335 , _phylo_groupLevel :: Level
336 , _phylo_groupIndex :: Int
337 , _phylo_groupLabel :: Text
338 , _phylo_groupSupport :: Support
339 , _phylo_groupWeight :: Maybe Double
340 , _phylo_groupNgrams :: [Int]
341 , _phylo_groupCooc :: !(Cooc)
342 , _phylo_groupBranchId :: PhyloBranchId
343 , _phylo_groupMeta :: Map Text [Double]
344 , _phylo_groupLevelParents :: [Pointer]
345 , _phylo_groupLevelChilds :: [Pointer]
346 , _phylo_groupPeriodParents :: [Pointer]
347 , _phylo_groupPeriodChilds :: [Pointer]
348 , _phylo_groupAncestors :: [Pointer]
350 deriving (Generic, Show, Eq, NFData)
352 -- | Weight : A generic mesure that can be associated with an Id
355 -- | Pointer : A weighted pointer to a given PhyloGroup
356 type Pointer = (PhyloGroupId, Weight)
358 data Filiation = ToParents | ToChilds deriving (Generic, Show)
359 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
362 ----------------------
363 -- | Phylo Clique | --
364 ----------------------
366 -- | Support : Number of Documents where a Clique occurs
369 data PhyloClique = PhyloClique
370 { _phyloClique_nodes :: [Int]
371 , _phyloClique_support :: Support
372 , _phyloClique_period :: (Date,Date)
373 , _phyloClique_weight :: Maybe Double
374 } deriving (Generic,NFData,Show,Eq)
380 type DotId = TextLazy.Text
382 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
384 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
386 data Order = Asc | Desc deriving (Show,Generic,Eq)
388 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
390 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
394 { _branch_labelTagger :: Tagger
395 , _branch_labelSize :: Int }
397 { _group_labelTagger :: Tagger
398 , _group_labelSize :: Int }
399 deriving (Show,Generic,Eq)
403 { _branch_id :: PhyloBranchId
404 , _branch_canonId :: [Int]
405 , _branch_seaLevel :: [Double]
406 , _branch_x :: Double
407 , _branch_y :: Double
408 , _branch_w :: Double
409 , _branch_t :: Double
410 , _branch_label :: Text
411 , _branch_meta :: Map Text [Double]
412 } deriving (Generic, Show, Eq)
416 { _export_groups :: [PhyloGroup]
417 , _export_branches :: [PhyloBranch]
418 } deriving (Generic, Show)
425 makeLenses ''Proximity
426 makeLenses ''SeaElevation
429 makeLenses ''PhyloLabel
430 makeLenses ''TimeUnit
431 makeLenses ''PhyloFoundations
432 makeLenses ''PhyloClique
434 makeLenses ''PhyloPeriod
435 makeLenses ''PhyloLevel
436 makeLenses ''PhyloGroup
437 makeLenses ''PhyloParam
438 makeLenses ''PhyloExport
439 makeLenses ''PhyloBranch
441 ------------------------
442 -- | JSON instances | --
443 ------------------------
445 instance FromJSON Phylo
446 instance ToJSON Phylo
447 instance FromJSON PhyloParam
448 instance ToJSON PhyloParam
449 instance FromJSON PhyloPeriod
450 instance ToJSON PhyloPeriod
451 instance FromJSON PhyloLevel
452 instance ToJSON PhyloLevel
453 instance FromJSON Software
454 instance ToJSON Software
455 instance FromJSON PhyloGroup
456 instance ToJSON PhyloGroup
458 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)