2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.AdaptativePhylo where
30 import Data.Aeson.TH (deriveJSON)
31 import Data.Text (Text, pack)
32 import Data.Vector (Vector)
35 import Gargantext.Core.Utils.Prefix (unPrefix)
36 import Gargantext.Prelude
37 import Gargantext.Core.Text.Context (TermList)
40 import GHC.IO (FilePath)
41 import Control.DeepSeq (NFData)
42 import Control.Lens (makeLenses)
44 import qualified Data.Text.Lazy as TextLazy
53 Wos {_wos_limit :: Int}
54 | Csv {_csv_limit :: Int}
55 deriving (Show,Generic,Eq)
59 { _cons_start :: Double
60 , _cons_step :: Double }
62 { _adap_granularity :: Double }
63 deriving (Show,Generic,Eq)
67 { _wlj_sensibility :: Double
69 -- , _wlj_thresholdInit :: Double
70 -- , _wlj_thresholdStep :: Double
71 -- | max height for sea level in temporal matching
72 -- , _wlj_elevation :: Double
76 deriving (Show,Generic,Eq)
79 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
81 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
85 { _bpt_threshold :: Double
86 , _bpt_sensibility :: Double
87 , _bpt_scope :: SynchronyScope
88 , _bpt_strategy :: SynchronyStrategy }
89 | ByProximityDistribution
90 { _bpd_sensibility :: Double
91 , _bpd_strategy :: SynchronyStrategy }
92 deriving (Show,Generic,Eq)
99 , _year_matchingFrame :: Int }
100 deriving (Show,Generic,Eq)
105 { _fis_support :: Int
109 deriving (Show,Generic,Eq)
113 Quality { _qua_granularity :: Double
114 , _qua_minBranch :: Int }
115 deriving (Show,Generic,Eq)
119 Config { corpusPath :: FilePath
120 , listPath :: FilePath
121 , outputPath :: FilePath
122 , corpusParser :: CorpusParser
125 , phyloProximity :: Proximity
126 , seaElevation :: SeaElevation
127 , findAncestors :: Bool
128 , phyloSynchrony :: Synchrony
129 , phyloQuality :: Quality
130 , timeUnit :: TimeUnit
132 , exportLabel :: [PhyloLabel]
134 , exportFilter :: [Filter]
135 } deriving (Show,Generic,Eq)
138 defaultConfig :: Config
140 Config { corpusPath = ""
143 , corpusParser = Csv 1000
144 , phyloName = pack "Default Phylo"
146 , phyloProximity = WeightedLogJaccard 10
147 , seaElevation = Constante 0.1 0.1
148 , findAncestors = True
149 , phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
150 , phyloQuality = Quality 100 1
151 , timeUnit = Year 3 1 5
152 , clique = MaxClique 0
153 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
154 , exportSort = ByHierarchy
155 , exportFilter = [ByBranchSize 2]
158 instance FromJSON Config
159 instance ToJSON Config
160 instance FromJSON CorpusParser
161 instance ToJSON CorpusParser
162 instance FromJSON Proximity
163 instance ToJSON Proximity
164 instance FromJSON SeaElevation
165 instance ToJSON SeaElevation
166 instance FromJSON TimeUnit
167 instance ToJSON TimeUnit
168 instance FromJSON Clique
169 instance ToJSON Clique
170 instance FromJSON PhyloLabel
171 instance ToJSON PhyloLabel
172 instance FromJSON Tagger
173 instance ToJSON Tagger
174 instance FromJSON Sort
176 instance FromJSON Order
177 instance ToJSON Order
178 instance FromJSON Filter
179 instance ToJSON Filter
180 instance FromJSON SynchronyScope
181 instance ToJSON SynchronyScope
182 instance FromJSON SynchronyStrategy
183 instance ToJSON SynchronyStrategy
184 instance FromJSON Synchrony
185 instance ToJSON Synchrony
186 instance FromJSON Quality
187 instance ToJSON Quality
190 -- | Software parameters
192 Software { _software_name :: Text
193 , _software_version :: Text
194 } deriving (Generic, Show, Eq)
196 defaultSoftware :: Software
198 Software { _software_name = pack "Gargantext"
199 , _software_version = pack "v4" }
202 -- | Global parameters of a Phylo
204 PhyloParam { _phyloParam_version :: Text
205 , _phyloParam_software :: Software
206 , _phyloParam_config :: Config
207 } deriving (Generic, Show, Eq)
209 defaultPhyloParam :: PhyloParam
211 PhyloParam { _phyloParam_version = pack "v2.adaptative"
212 , _phyloParam_software = defaultSoftware
213 , _phyloParam_config = defaultConfig }
221 -- | Date : a simple Integer
224 -- | Ngrams : a contiguous sequence of n terms
227 -- | Document : a piece of Text linked to a Date
228 data Document = Document
231 } deriving (Eq,Show,Generic,NFData)
239 -- | The Foundations of a Phylo created from a given TermList
240 data PhyloFoundations = PhyloFoundations
241 { _foundations_roots :: !(Vector Ngrams)
242 , _foundations_mapList :: TermList
243 } deriving (Generic, Show, Eq)
246 ---------------------------
247 -- | Coocurency Matrix | --
248 ---------------------------
251 -- | Cooc : a coocurency matrix between two ngrams
252 type Cooc = Map (Int,Int) Double
260 -- | Phylo datatype of a phylomemy
261 -- foundations : the foundations of the phylo
262 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
263 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
264 -- param : the parameters of the phylomemy (with the user's configuration)
265 -- periods : the temporal steps of a phylomemy
267 Phylo { _phylo_foundations :: PhyloFoundations
268 , _phylo_timeCooc :: !(Map Date Cooc)
269 , _phylo_timeDocs :: !(Map Date Double)
270 , _phylo_termFreq :: !(Map Int Double)
271 , _phylo_lastTermFreq :: !(Map Int Double)
272 , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
273 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
274 , _phylo_param :: PhyloParam
275 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
277 deriving (Generic, Show, Eq)
280 -- | PhyloPeriodId : the id of a given period
281 type PhyloPeriodId = (Date,Date)
283 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
284 -- id: tuple (start date, end date) of the temporal step of the phylomemy
285 -- levels: levels of granularity
287 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
288 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
289 } deriving (Generic, Show, Eq)
292 -- | Level : a level of clustering
295 -- | PhyloLevelId : the id of a level of clustering in a given period
296 type PhyloLevelId = (PhyloPeriodId,Level)
298 -- | PhyloLevel : levels of phylomemy on a synchronic axis
299 -- Levels description:
300 -- Level 0: The foundations and the base of the phylo
301 -- Level 1: First level of clustering (the Fis)
302 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
304 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
305 , _phylo_levelLevel :: Level
306 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
308 deriving (Generic, Show, Eq)
311 type PhyloGroupId = (PhyloLevelId, Int)
313 -- | BranchId : (a level, a sequence of branch index)
314 -- the sequence is a path of heritage from the most to the less specific branch
315 type PhyloBranchId = (Level, [Int])
317 -- | PhyloGroup : group of ngrams at each level and period
319 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
320 , _phylo_groupLevel :: Level
321 , _phylo_groupIndex :: Int
322 , _phylo_groupLabel :: Text
323 , _phylo_groupSupport :: Support
324 , _phylo_groupNgrams :: [Int]
325 , _phylo_groupCooc :: !(Cooc)
326 , _phylo_groupBranchId :: PhyloBranchId
327 , _phylo_groupMeta :: Map Text [Double]
328 , _phylo_groupLevelParents :: [Pointer]
329 , _phylo_groupLevelChilds :: [Pointer]
330 , _phylo_groupPeriodParents :: [Pointer]
331 , _phylo_groupPeriodChilds :: [Pointer]
332 , _phylo_groupAncestors :: [Pointer]
334 deriving (Generic, Show, Eq, NFData)
336 -- | Weight : A generic mesure that can be associated with an Id
339 -- | Pointer : A weighted pointer to a given PhyloGroup
340 type Pointer = (PhyloGroupId, Weight)
342 data Filiation = ToParents | ToChilds deriving (Generic, Show)
343 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
346 ----------------------
347 -- | Phylo Clique | --
348 ----------------------
350 -- | Support : Number of Documents where a Clique occurs
353 data PhyloClique = PhyloClique
354 { _phyloClique_nodes :: [Int]
355 , _phyloClique_support :: Support
356 , _phyloClique_period :: (Date,Date)
357 } deriving (Generic,NFData,Show,Eq)
363 type DotId = TextLazy.Text
365 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
367 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
369 data Order = Asc | Desc deriving (Show,Generic,Eq)
371 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
373 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
377 { _branch_labelTagger :: Tagger
378 , _branch_labelSize :: Int }
380 { _group_labelTagger :: Tagger
381 , _group_labelSize :: Int }
382 deriving (Show,Generic,Eq)
386 { _branch_id :: PhyloBranchId
387 , _branch_canonId :: [Int]
388 , _branch_seaLevel :: [Double]
389 , _branch_x :: Double
390 , _branch_y :: Double
391 , _branch_w :: Double
392 , _branch_t :: Double
393 , _branch_label :: Text
394 , _branch_meta :: Map Text [Double]
395 } deriving (Generic, Show, Eq)
399 { _export_groups :: [PhyloGroup]
400 , _export_branches :: [PhyloBranch]
401 } deriving (Generic, Show)
408 makeLenses ''Proximity
409 makeLenses ''SeaElevation
412 makeLenses ''PhyloLabel
413 makeLenses ''TimeUnit
414 makeLenses ''PhyloFoundations
415 makeLenses ''PhyloClique
417 makeLenses ''PhyloPeriod
418 makeLenses ''PhyloLevel
419 makeLenses ''PhyloGroup
420 makeLenses ''PhyloParam
421 makeLenses ''PhyloExport
422 makeLenses ''PhyloBranch
424 ------------------------
425 -- | JSON instances | --
426 ------------------------
429 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)