2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.AdaptativePhylo where
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
47 import qualified Data.Text.Lazy as TextLazy
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
62 { _cons_start :: Double
63 , _cons_step :: Double }
65 { _adap_granularity :: Double }
66 deriving (Show,Generic,Eq)
70 { _wlj_sensibility :: Double
71 -- , _wlj_thresholdInit :: Double
72 -- , _wlj_thresholdStep :: Double
73 -- | max height for sea level in temporal matching
74 -- , _wlj_elevation :: Double
77 deriving (Show,Generic,Eq)
80 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
82 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
86 { _bpt_threshold :: Double
87 , _bpt_sensibility :: Double
88 , _bpt_scope :: SynchronyScope
89 , _bpt_strategy :: SynchronyStrategy }
90 | ByProximityDistribution
91 { _bpd_sensibility :: Double
92 , _bpd_strategy :: SynchronyStrategy }
93 deriving (Show,Generic,Eq)
100 , _year_matchingFrame :: Int }
101 deriving (Show,Generic,Eq)
106 { _fis_support :: Int
110 deriving (Show,Generic,Eq)
114 Quality { _qua_granularity :: Double
115 , _qua_minBranch :: Int }
116 deriving (Show,Generic,Eq)
120 Config { corpusPath :: FilePath
121 , listPath :: FilePath
122 , outputPath :: FilePath
123 , corpusParser :: CorpusParser
126 , phyloProximity :: Proximity
127 , seaElevation :: SeaElevation
128 , phyloSynchrony :: Synchrony
129 , phyloQuality :: Quality
130 , timeUnit :: TimeUnit
132 , exportLabel :: [PhyloLabel]
134 , exportFilter :: [Filter]
135 } deriving (Show,Generic,Eq)
138 defaultConfig :: Config
140 Config { corpusPath = ""
143 , corpusParser = Csv 1000
144 , phyloName = pack "Default Phylo"
146 , phyloProximity = WeightedLogJaccard 10
147 , seaElevation = Constante 0 0.1
148 , phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
149 , phyloQuality = Quality 1 1
150 , timeUnit = Year 3 1 5
152 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
153 , exportSort = ByHierarchy
154 , exportFilter = [ByBranchSize 2]
157 instance FromJSON Config
158 instance ToJSON Config
159 instance FromJSON CorpusParser
160 instance ToJSON CorpusParser
161 instance FromJSON Proximity
162 instance ToJSON Proximity
163 instance FromJSON SeaElevation
164 instance ToJSON SeaElevation
165 instance FromJSON TimeUnit
166 instance ToJSON TimeUnit
167 instance FromJSON Clique
168 instance ToJSON Clique
169 instance FromJSON PhyloLabel
170 instance ToJSON PhyloLabel
171 instance FromJSON Tagger
172 instance ToJSON Tagger
173 instance FromJSON Sort
175 instance FromJSON Order
176 instance ToJSON Order
177 instance FromJSON Filter
178 instance ToJSON Filter
179 instance FromJSON SynchronyScope
180 instance ToJSON SynchronyScope
181 instance FromJSON SynchronyStrategy
182 instance ToJSON SynchronyStrategy
183 instance FromJSON Synchrony
184 instance ToJSON Synchrony
185 instance FromJSON Quality
186 instance ToJSON Quality
189 -- | Software parameters
191 Software { _software_name :: Text
192 , _software_version :: Text
193 } deriving (Generic, Show, Eq)
195 defaultSoftware :: Software
197 Software { _software_name = pack "Gargantext"
198 , _software_version = pack "v4" }
201 -- | Global parameters of a Phylo
203 PhyloParam { _phyloParam_version :: Text
204 , _phyloParam_software :: Software
205 , _phyloParam_config :: Config
206 } deriving (Generic, Show, Eq)
208 defaultPhyloParam :: PhyloParam
210 PhyloParam { _phyloParam_version = pack "v2.adaptative"
211 , _phyloParam_software = defaultSoftware
212 , _phyloParam_config = defaultConfig }
220 -- | Date : a simple Integer
223 -- | Ngrams : a contiguous sequence of n terms
226 -- | Document : a piece of Text linked to a Date
227 data Document = Document
230 } deriving (Eq,Show,Generic,NFData)
238 -- | The Foundations of a Phylo created from a given TermList
239 data PhyloFoundations = PhyloFoundations
240 { _foundations_roots :: !(Vector Ngrams)
241 , _foundations_mapList :: TermList
242 } deriving (Generic, Show, Eq)
245 ---------------------------
246 -- | Coocurency Matrix | --
247 ---------------------------
250 -- | Cooc : a coocurency matrix between two ngrams
251 type Cooc = Map (Int,Int) Double
259 -- | Phylo datatype of a phylomemy
260 -- foundations : the foundations of the phylo
261 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
262 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
263 -- param : the parameters of the phylomemy (with the user's configuration)
264 -- periods : the temporal steps of a phylomemy
266 Phylo { _phylo_foundations :: PhyloFoundations
267 , _phylo_timeCooc :: !(Map Date Cooc)
268 , _phylo_timeDocs :: !(Map Date Double)
269 , _phylo_termFreq :: !(Map Int Double)
270 , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
271 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
272 , _phylo_param :: PhyloParam
273 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
275 deriving (Generic, Show, Eq)
278 -- | PhyloPeriodId : the id of a given period
279 type PhyloPeriodId = (Date,Date)
281 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
282 -- id: tuple (start date, end date) of the temporal step of the phylomemy
283 -- levels: levels of granularity
285 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
286 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
287 } deriving (Generic, Show, Eq)
290 -- | Level : a level of clustering
293 -- | PhyloLevelId : the id of a level of clustering in a given period
294 type PhyloLevelId = (PhyloPeriodId,Level)
296 -- | PhyloLevel : levels of phylomemy on a synchronic axis
297 -- Levels description:
298 -- Level 0: The foundations and the base of the phylo
299 -- Level 1: First level of clustering (the Fis)
300 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
302 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
303 , _phylo_levelLevel :: Level
304 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
306 deriving (Generic, Show, Eq)
309 type PhyloGroupId = (PhyloLevelId, Int)
311 -- | BranchId : (a level, a sequence of branch index)
312 -- the sequence is a path of heritage from the most to the less specific branch
313 type PhyloBranchId = (Level, [Int])
315 -- | PhyloGroup : group of ngrams at each level and period
317 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
318 , _phylo_groupLevel :: Level
319 , _phylo_groupIndex :: Int
320 , _phylo_groupLabel :: Text
321 , _phylo_groupSupport :: Support
322 , _phylo_groupNgrams :: [Int]
323 , _phylo_groupCooc :: !(Cooc)
324 , _phylo_groupBranchId :: PhyloBranchId
325 , _phylo_groupMeta :: Map Text [Double]
326 , _phylo_groupLevelParents :: [Pointer]
327 , _phylo_groupLevelChilds :: [Pointer]
328 , _phylo_groupPeriodParents :: [Pointer]
329 , _phylo_groupPeriodChilds :: [Pointer]
331 deriving (Generic, Show, Eq, NFData)
333 -- | Weight : A generic mesure that can be associated with an Id
336 -- | Pointer : A weighted pointer to a given PhyloGroup
337 type Pointer = (PhyloGroupId, Weight)
339 data Filiation = ToParents | ToChilds deriving (Generic, Show)
340 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
343 ----------------------
344 -- | Phylo Clique | --
345 ----------------------
347 -- | Support : Number of Documents where a Clique occurs
350 data PhyloClique = PhyloClique
351 { _phyloClique_nodes :: Set Ngrams
352 , _phyloClique_support :: Support
353 , _phyloClique_period :: (Date,Date)
354 } deriving (Generic,NFData,Show,Eq)
357 ------------------------
358 -- | Phylo Ancestor | --
359 ------------------------
361 data PhyloAncestor = PhyloAncestor
362 { _phyloAncestor_id :: Int
363 , _phyloAncestor_ngrams :: [Int]
364 , _phyloAncestor_groups :: [PhyloGroupId]
365 } deriving (Generic,NFData,Show,Eq)
371 type DotId = TextLazy.Text
373 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
375 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
377 data Order = Asc | Desc deriving (Show,Generic,Eq)
379 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
381 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
385 { _branch_labelTagger :: Tagger
386 , _branch_labelSize :: Int }
388 { _group_labelTagger :: Tagger
389 , _group_labelSize :: Int }
390 deriving (Show,Generic,Eq)
394 { _branch_id :: PhyloBranchId
395 , _branch_canonId :: [Int]
396 , _branch_seaLevel :: [Double]
397 , _branch_x :: Double
398 , _branch_y :: Double
399 , _branch_w :: Double
400 , _branch_t :: Double
401 , _branch_label :: Text
402 , _branch_meta :: Map Text [Double]
403 } deriving (Generic, Show, Eq)
407 { _export_groups :: [PhyloGroup]
408 , _export_branches :: [PhyloBranch]
409 , _export_ancestors :: [PhyloAncestor]
410 } deriving (Generic, Show)
417 makeLenses ''Proximity
418 makeLenses ''SeaElevation
421 makeLenses ''PhyloLabel
422 makeLenses ''TimeUnit
423 makeLenses ''PhyloFoundations
424 makeLenses ''PhyloClique
426 makeLenses ''PhyloPeriod
427 makeLenses ''PhyloLevel
428 makeLenses ''PhyloGroup
429 makeLenses ''PhyloParam
430 makeLenses ''PhyloExport
431 makeLenses ''PhyloBranch
433 ------------------------
434 -- | JSON instances | --
435 ------------------------
438 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)