2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Viz.AdaptativePhylo where
30 import Data.Aeson.TH (deriveJSON)
31 import Data.Text (Text, pack)
32 import Data.Vector (Vector)
36 import Gargantext.Core.Utils.Prefix (unPrefix)
37 import Gargantext.Prelude
38 import Gargantext.Text.Context (TermList)
41 import GHC.IO (FilePath)
42 import Control.DeepSeq (NFData)
43 import Control.Lens (makeLenses)
45 import qualified Data.Text.Lazy as TextLazy
54 Wos {_wos_limit :: Int}
55 | Csv {_csv_limit :: Int}
56 deriving (Show,Generic,Eq)
60 { _cons_start :: Double
61 , _cons_step :: Double }
63 { _adap_granularity :: Double }
64 deriving (Show,Generic,Eq)
68 { _wlj_sensibility :: Double
69 -- , _wlj_thresholdInit :: Double
70 -- , _wlj_thresholdStep :: Double
71 -- | max height for sea level in temporal matching
72 -- , _wlj_elevation :: Double
75 deriving (Show,Generic,Eq)
78 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
80 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
84 { _bpt_threshold :: Double
85 , _bpt_sensibility :: Double
86 , _bpt_scope :: SynchronyScope
87 , _bpt_strategy :: SynchronyStrategy }
88 | ByProximityDistribution
89 { _bpd_sensibility :: Double
90 , _bpd_strategy :: SynchronyStrategy }
91 deriving (Show,Generic,Eq)
98 , _year_matchingFrame :: Int }
99 deriving (Show,Generic,Eq)
104 { _fis_support :: Int
108 deriving (Show,Generic,Eq)
112 Quality { _qua_granularity :: Double
113 , _qua_minBranch :: Int }
114 deriving (Show,Generic,Eq)
118 Config { corpusPath :: FilePath
119 , listPath :: FilePath
120 , outputPath :: FilePath
121 , corpusParser :: CorpusParser
124 , phyloProximity :: Proximity
125 , seaElevation :: SeaElevation
126 , phyloSynchrony :: Synchrony
127 , phyloQuality :: Quality
128 , timeUnit :: TimeUnit
130 , exportLabel :: [PhyloLabel]
132 , exportFilter :: [Filter]
133 } deriving (Show,Generic,Eq)
136 defaultConfig :: Config
138 Config { corpusPath = ""
141 , corpusParser = Csv 1000
142 , phyloName = pack "Default Phylo"
144 , phyloProximity = WeightedLogJaccard 10
145 , seaElevation = Constante 0 0.1
146 , phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
147 , phyloQuality = Quality 0.6 1
148 , timeUnit = Year 3 1 5
150 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
151 , exportSort = ByHierarchy
152 , exportFilter = [ByBranchSize 2]
155 instance FromJSON Config
156 instance ToJSON Config
157 instance FromJSON CorpusParser
158 instance ToJSON CorpusParser
159 instance FromJSON Proximity
160 instance ToJSON Proximity
161 instance FromJSON SeaElevation
162 instance ToJSON SeaElevation
163 instance FromJSON TimeUnit
164 instance ToJSON TimeUnit
165 instance FromJSON Clique
166 instance ToJSON Clique
167 instance FromJSON PhyloLabel
168 instance ToJSON PhyloLabel
169 instance FromJSON Tagger
170 instance ToJSON Tagger
171 instance FromJSON Sort
173 instance FromJSON Order
174 instance ToJSON Order
175 instance FromJSON Filter
176 instance ToJSON Filter
177 instance FromJSON SynchronyScope
178 instance ToJSON SynchronyScope
179 instance FromJSON SynchronyStrategy
180 instance ToJSON SynchronyStrategy
181 instance FromJSON Synchrony
182 instance ToJSON Synchrony
183 instance FromJSON Quality
184 instance ToJSON Quality
187 -- | Software parameters
189 Software { _software_name :: Text
190 , _software_version :: Text
191 } deriving (Generic, Show, Eq)
193 defaultSoftware :: Software
195 Software { _software_name = pack "Gargantext"
196 , _software_version = pack "v4" }
199 -- | Global parameters of a Phylo
201 PhyloParam { _phyloParam_version :: Text
202 , _phyloParam_software :: Software
203 , _phyloParam_config :: Config
204 } deriving (Generic, Show, Eq)
206 defaultPhyloParam :: PhyloParam
208 PhyloParam { _phyloParam_version = pack "v2.adaptative"
209 , _phyloParam_software = defaultSoftware
210 , _phyloParam_config = defaultConfig }
218 -- | Date : a simple Integer
221 -- | Ngrams : a contiguous sequence of n terms
224 -- | Document : a piece of Text linked to a Date
225 data Document = Document
228 } deriving (Eq,Show,Generic,NFData)
236 -- | The Foundations of a Phylo created from a given TermList
237 data PhyloFoundations = PhyloFoundations
238 { _foundations_roots :: !(Vector Ngrams)
239 , _foundations_mapList :: TermList
240 } deriving (Generic, Show, Eq)
243 ---------------------------
244 -- | Coocurency Matrix | --
245 ---------------------------
248 -- | Cooc : a coocurency matrix between two ngrams
249 type Cooc = Map (Int,Int) Double
257 -- | Phylo datatype of a phylomemy
258 -- foundations : the foundations of the phylo
259 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
260 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
261 -- param : the parameters of the phylomemy (with the user's configuration)
262 -- periods : the temporal steps of a phylomemy
264 Phylo { _phylo_foundations :: PhyloFoundations
265 , _phylo_timeCooc :: !(Map Date Cooc)
266 , _phylo_timeDocs :: !(Map Date Double)
267 , _phylo_termFreq :: !(Map Int Double)
268 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
269 , _phylo_param :: PhyloParam
270 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
272 deriving (Generic, Show, Eq)
275 -- | PhyloPeriodId : the id of a given period
276 type PhyloPeriodId = (Date,Date)
278 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
279 -- id: tuple (start date, end date) of the temporal step of the phylomemy
280 -- levels: levels of granularity
282 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
283 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
284 } deriving (Generic, Show, Eq)
287 -- | Level : a level of clustering
290 -- | PhyloLevelId : the id of a level of clustering in a given period
291 type PhyloLevelId = (PhyloPeriodId,Level)
293 -- | PhyloLevel : levels of phylomemy on a synchronic axis
294 -- Levels description:
295 -- Level 0: The foundations and the base of the phylo
296 -- Level 1: First level of clustering (the Fis)
297 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
299 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
300 , _phylo_levelLevel :: Level
301 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
303 deriving (Generic, Show, Eq)
306 type PhyloGroupId = (PhyloLevelId, Int)
308 -- | BranchId : (a level, a sequence of branch index)
309 -- the sequence is a path of heritage from the most to the less specific branch
310 type PhyloBranchId = (Level, [Int])
312 -- | PhyloGroup : group of ngrams at each level and period
314 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
315 , _phylo_groupLevel :: Level
316 , _phylo_groupIndex :: Int
317 , _phylo_groupLabel :: Text
318 , _phylo_groupSupport :: Support
319 , _phylo_groupNgrams :: [Int]
320 , _phylo_groupCooc :: !(Cooc)
321 , _phylo_groupBranchId :: PhyloBranchId
322 , _phylo_groupMeta :: Map Text [Double]
323 , _phylo_groupLevelParents :: [Pointer]
324 , _phylo_groupLevelChilds :: [Pointer]
325 , _phylo_groupPeriodParents :: [Pointer]
326 , _phylo_groupPeriodChilds :: [Pointer]
328 deriving (Generic, Show, Eq, NFData)
330 -- | Weight : A generic mesure that can be associated with an Id
333 -- | Pointer : A weighted pointer to a given PhyloGroup
334 type Pointer = (PhyloGroupId, Weight)
336 data Filiation = ToParents | ToChilds deriving (Generic, Show)
337 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
340 ----------------------
341 -- | Phylo Clique | --
342 ----------------------
344 -- | Support : Number of Documents where a Clique occurs
347 data PhyloClique = PhyloClique
348 { _phyloClique_nodes :: Set Ngrams
349 , _phyloClique_support :: Support
350 , _phyloClique_period :: (Date,Date)
351 } deriving (Generic,NFData,Show,Eq)
358 type DotId = TextLazy.Text
360 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
362 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
364 data Order = Asc | Desc deriving (Show,Generic,Eq)
366 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
368 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
372 { _branch_labelTagger :: Tagger
373 , _branch_labelSize :: Int }
375 { _group_labelTagger :: Tagger
376 , _group_labelSize :: Int }
377 deriving (Show,Generic,Eq)
381 { _branch_id :: PhyloBranchId
382 , _branch_canonId :: [Int]
383 , _branch_seaLevel :: [Double]
384 , _branch_x :: Double
385 , _branch_y :: Double
386 , _branch_w :: Double
387 , _branch_t :: Double
388 , _branch_label :: Text
389 , _branch_meta :: Map Text [Double]
390 } deriving (Generic, Show, Eq)
394 { _export_groups :: [PhyloGroup]
395 , _export_branches :: [PhyloBranch]
396 } deriving (Generic, Show)
403 makeLenses ''Proximity
404 makeLenses ''SeaElevation
407 makeLenses ''PhyloLabel
408 makeLenses ''TimeUnit
409 makeLenses ''PhyloFoundations
410 makeLenses ''PhyloClique
412 makeLenses ''PhyloPeriod
413 makeLenses ''PhyloLevel
414 makeLenses ''PhyloGroup
415 makeLenses ''PhyloParam
416 makeLenses ''PhyloExport
417 makeLenses ''PhyloBranch
419 ------------------------
420 -- | JSON instances | --
421 ------------------------
424 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)