2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.Phylo where
30 import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
31 import Control.DeepSeq (NFData)
32 import Control.Lens (makeLenses)
34 import Data.Aeson.TH (deriveJSON)
36 import Data.Text (Text, pack)
37 import Data.Vector (Vector)
39 import GHC.IO (FilePath)
40 import Gargantext.Core.Text.Context (TermList)
41 import Gargantext.Core.Utils.Prefix (unPrefix)
42 import Gargantext.Prelude
43 import qualified Data.Text.Lazy as TextLazy
50 Wos {_wos_limit :: Int}
51 | Csv {_csv_limit :: Int}
52 | Csv' {_csv'_limit :: Int}
53 deriving (Show,Generic,Eq)
55 instance ToSchema CorpusParser where
56 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
59 data ListParser = V3 | V4 deriving (Show,Generic,Eq)
60 instance ToSchema ListParser
65 { _cons_start :: Double
66 , _cons_step :: Double }
68 { _adap_granularity :: Double }
69 deriving (Show,Generic,Eq)
71 instance ToSchema SeaElevation
75 { _wlj_sensibility :: Double
77 -- , _wlj_thresholdInit :: Double
78 -- , _wlj_thresholdStep :: Double
79 -- | max height for sea level in temporal matching
80 -- , _wlj_elevation :: Double
84 { _wlj_sensibility :: Double
86 -- , _wlj_thresholdInit :: Double
87 -- , _wlj_thresholdStep :: Double
88 -- | max height for sea level in temporal matching
89 -- , _wlj_elevation :: Double
92 | Hamming { _wlj_sensibility :: Double }
94 deriving (Show,Generic,Eq)
96 instance ToSchema Proximity where
97 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
100 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches
101 deriving (Show,Generic,Eq, ToSchema)
103 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups
104 deriving (Show,Generic,Eq)
106 instance ToSchema SynchronyStrategy where
107 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
112 { _bpt_threshold :: Double
113 , _bpt_sensibility :: Double
114 , _bpt_scope :: SynchronyScope
115 , _bpt_strategy :: SynchronyStrategy }
116 | ByProximityDistribution
117 { _bpd_sensibility :: Double
118 , _bpd_strategy :: SynchronyStrategy }
119 deriving (Show,Generic,Eq)
121 instance ToSchema Synchrony where
122 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
128 { _epoch_period :: Int
130 , _epoch_matchingFrame :: Int }
132 { _year_period :: Int
134 , _year_matchingFrame :: Int }
136 { _month_period :: Int
138 , _month_matchingFrame :: Int }
140 { _week_period :: Int
142 , _week_matchingFrame :: Int }
146 , _day_matchingFrame :: Int }
147 deriving (Show,Generic,Eq)
149 instance ToSchema TimeUnit where
150 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
153 data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
155 instance ToSchema CliqueFilter where
156 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
162 { _fis_support :: Int
166 , _mcl_threshold :: Double
167 , _mcl_filter :: CliqueFilter }
168 deriving (Show,Generic,Eq)
170 instance ToSchema Clique where
171 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
175 Quality { _qua_granularity :: Double
176 , _qua_minBranch :: Int }
177 deriving (Show,Generic,Eq)
179 instance ToSchema Quality where
180 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_qua_")
185 Config { corpusPath :: FilePath
186 , listPath :: FilePath
187 , outputPath :: FilePath
188 , corpusParser :: CorpusParser
189 , listParser :: ListParser
192 , phyloProximity :: Proximity
193 , seaElevation :: SeaElevation
194 , findAncestors :: Bool
195 , phyloSynchrony :: Synchrony
196 , phyloQuality :: Quality
197 , timeUnit :: TimeUnit
199 , exportLabel :: [PhyloLabel]
201 , exportFilter :: [Filter]
202 } deriving (Show,Generic,Eq)
204 instance ToSchema Config
207 defaultConfig :: Config
209 Config { corpusPath = "corpus.csv" -- useful for commandline only
210 , listPath = "list.csv" -- useful for commandline only
211 , outputPath = "data/"
212 , corpusParser = Csv 100000
214 , phyloName = pack "Phylo Name"
216 , phyloProximity = WeightedLogJaccard 0.5
217 , seaElevation = Constante 0.1 0.1
218 , findAncestors = False
219 , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
220 , phyloQuality = Quality 0.5 1
221 , timeUnit = Year 3 1 5
222 , clique = MaxClique 5 0.0001 ByThreshold
223 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
224 , exportSort = ByHierarchy Desc
225 , exportFilter = [ByBranchSize 3]
228 instance FromJSON Config
229 instance ToJSON Config
231 instance FromJSON CorpusParser
232 instance ToJSON CorpusParser
234 instance FromJSON ListParser
235 instance ToJSON ListParser
237 instance FromJSON Proximity
238 instance ToJSON Proximity
240 instance FromJSON SeaElevation
241 instance ToJSON SeaElevation
243 instance FromJSON TimeUnit
244 instance ToJSON TimeUnit
246 instance FromJSON CliqueFilter
247 instance ToJSON CliqueFilter
249 instance FromJSON Clique
250 instance ToJSON Clique
252 instance FromJSON PhyloLabel
253 instance ToJSON PhyloLabel
255 instance FromJSON Tagger
256 instance ToJSON Tagger
258 instance FromJSON Sort
261 instance FromJSON Order
262 instance ToJSON Order
264 instance FromJSON Filter
265 instance ToJSON Filter
267 instance FromJSON SynchronyScope
268 instance ToJSON SynchronyScope
270 instance FromJSON SynchronyStrategy
271 instance ToJSON SynchronyStrategy
273 instance FromJSON Synchrony
274 instance ToJSON Synchrony
276 instance FromJSON Quality
277 instance ToJSON Quality
280 -- | Software parameters
282 Software { _software_name :: Text
283 , _software_version :: Text
284 } deriving (Generic, Show, Eq)
286 instance ToSchema Software where
287 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
291 defaultSoftware :: Software
293 Software { _software_name = pack "Gargantext"
294 , _software_version = pack "v4" }
297 -- | Global parameters of a Phylo
299 PhyloParam { _phyloParam_version :: Text
300 , _phyloParam_software :: Software
301 , _phyloParam_config :: Config
302 } deriving (Generic, Show, Eq)
304 instance ToSchema PhyloParam where
305 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
309 defaultPhyloParam :: PhyloParam
311 PhyloParam { _phyloParam_version = pack "v2.adaptative"
312 , _phyloParam_software = defaultSoftware
313 , _phyloParam_config = defaultConfig }
320 -- | Date : a simple Integer
323 -- | Ngrams : a contiguous sequence of n terms
326 -- Document : a piece of Text linked to a Date
327 -- date = computational date; date' = original string date yyyy-mm-dd
328 -- Export Database to Document
329 data Document = Document
330 { date :: Date -- datatype Date {unDate :: Int}
331 , date' :: Text -- show date
333 , weight :: Maybe Double
335 } deriving (Eq,Show,Generic,NFData)
343 -- | The Foundations of a Phylo created from a given TermList
344 data PhyloFoundations = PhyloFoundations
345 { _foundations_roots :: !(Vector Ngrams)
346 , _foundations_mapList :: TermList
347 } deriving (Generic, Show, Eq)
349 instance ToSchema PhyloFoundations where
350 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_")
354 data PhyloSources = PhyloSources
355 { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
357 instance ToSchema PhyloSources where
358 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
360 ---------------------------
361 -- | Coocurency Matrix | --
362 ---------------------------
365 -- | Cooc : a coocurency matrix between two ngrams
366 type Cooc = Map (Int,Int) Double
374 -- | Phylo datatype of a phylomemy
375 -- foundations : the foundations of the phylo
376 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
377 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
378 -- param : the parameters of the phylomemy (with the user's configuration)
379 -- periods : the temporal steps of a phylomemy
381 Phylo { _phylo_foundations :: PhyloFoundations
382 , _phylo_sources :: PhyloSources
383 , _phylo_timeCooc :: !(Map Date Cooc)
384 , _phylo_timeDocs :: !(Map Date Double)
385 , _phylo_termFreq :: !(Map Int Double)
386 , _phylo_lastTermFreq :: !(Map Int Double)
387 , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
388 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
389 , _phylo_param :: PhyloParam
390 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
392 deriving (Generic, Show, Eq)
394 instance ToSchema Phylo where
395 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
398 -- | PhyloPeriodId : the id of a given period
399 type PhyloPeriodId = (Date,Date)
401 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
402 -- id: tuple (start date, end date) of the temporal step of the phylomemy
403 -- levels: levels of granularity
405 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
406 , _phylo_periodPeriod' :: (Text,Text)
407 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
408 } deriving (Generic, Show, Eq)
410 instance ToSchema PhyloPeriod where
411 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
415 -- | Level : a level of clustering
418 -- | PhyloLevelId : the id of a level of clustering in a given period
419 type PhyloLevelId = (PhyloPeriodId,Level)
421 -- | PhyloLevel : levels of phylomemy on a synchronic axis
422 -- Levels description:
423 -- Level 0: The foundations and the base of the phylo
424 -- Level 1: First level of clustering (the Fis)
425 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
427 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
428 , _phylo_levelPeriod' :: (Text,Text)
429 , _phylo_levelLevel :: Level
430 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
432 deriving (Generic, Show, Eq)
434 instance ToSchema PhyloLevel where
435 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
438 type PhyloGroupId = (PhyloLevelId, Int)
440 -- | BranchId : (a level, a sequence of branch index)
441 -- the sequence is a path of heritage from the most to the less specific branch
442 type PhyloBranchId = (Level, [Int])
444 -- | PhyloGroup : group of ngrams at each level and period
446 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
447 , _phylo_groupPeriod' :: (Text,Text)
448 , _phylo_groupLevel :: Level
449 , _phylo_groupIndex :: Int
450 , _phylo_groupLabel :: Text
451 , _phylo_groupSupport :: Support
452 , _phylo_groupWeight :: Maybe Double
453 , _phylo_groupSources :: [Int]
454 , _phylo_groupNgrams :: [Int]
455 , _phylo_groupCooc :: !(Cooc)
456 , _phylo_groupBranchId :: PhyloBranchId
457 , _phylo_groupMeta :: Map Text [Double]
458 , _phylo_groupLevelParents :: [Pointer]
459 , _phylo_groupLevelChilds :: [Pointer]
460 , _phylo_groupPeriodParents :: [Pointer]
461 , _phylo_groupPeriodChilds :: [Pointer]
462 , _phylo_groupAncestors :: [Pointer]
463 , _phylo_groupPeriodMemoryParents :: [Pointer']
464 , _phylo_groupPeriodMemoryChilds :: [Pointer']
466 deriving (Generic, Show, Eq, NFData)
468 instance ToSchema PhyloGroup where
469 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
472 -- | Weight : A generic mesure that can be associated with an Id
476 -- | Pointer : A weighted pointer to a given PhyloGroup
477 type Pointer = (PhyloGroupId, Weight)
478 -- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
479 type Pointer' = (PhyloGroupId, (Thr,Weight))
481 data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show)
482 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
485 ----------------------
486 -- | Phylo Clique | --
487 ----------------------
489 -- | Support : Number of Documents where a Clique occurs
492 data PhyloClique = PhyloClique
493 { _phyloClique_nodes :: [Int]
494 , _phyloClique_support :: Support
495 , _phyloClique_period :: (Date,Date)
496 , _phyloClique_weight :: Maybe Double
497 , _phyloClique_sources :: [Int]
498 } deriving (Generic,NFData,Show,Eq)
504 type DotId = TextLazy.Text
506 data EdgeType = GroupToGroup | GroupToGroupMemory | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
508 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
509 instance ToSchema Filter where
510 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
513 data Order = Asc | Desc deriving (Show,Generic,Eq, ToSchema)
515 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy {_sort_order :: Order } deriving (Show,Generic,Eq)
516 instance ToSchema Sort where
517 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sort_")
520 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
521 instance ToSchema Tagger where
522 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
527 { _branch_labelTagger :: Tagger
528 , _branch_labelSize :: Int }
530 { _group_labelTagger :: Tagger
531 , _group_labelSize :: Int }
532 deriving (Show,Generic,Eq)
534 instance ToSchema PhyloLabel where
535 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
540 { _branch_id :: PhyloBranchId
541 , _branch_canonId :: [Int]
542 , _branch_seaLevel :: [Double]
543 , _branch_x :: Double
544 , _branch_y :: Double
545 , _branch_w :: Double
546 , _branch_t :: Double
547 , _branch_label :: Text
548 , _branch_meta :: Map Text [Double]
549 } deriving (Generic, Show, Eq)
551 instance ToSchema PhyloBranch where
552 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_branch_")
556 { _export_groups :: [PhyloGroup]
557 , _export_branches :: [PhyloBranch]
558 } deriving (Generic, Show)
559 instance ToSchema PhyloExport where
560 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_export_")
568 makeLenses ''Proximity
569 makeLenses ''SeaElevation
572 makeLenses ''PhyloLabel
573 makeLenses ''TimeUnit
574 makeLenses ''PhyloFoundations
575 makeLenses ''PhyloClique
577 makeLenses ''PhyloPeriod
578 makeLenses ''PhyloLevel
579 makeLenses ''PhyloGroup
580 makeLenses ''PhyloParam
581 makeLenses ''PhyloExport
582 makeLenses ''PhyloBranch
584 ------------------------
585 -- | JSON instances | --
586 ------------------------
588 instance FromJSON Phylo
589 instance ToJSON Phylo
591 instance FromJSON PhyloSources
592 instance ToJSON PhyloSources
594 instance FromJSON PhyloParam
595 instance ToJSON PhyloParam
597 instance FromJSON PhyloPeriod
598 instance ToJSON PhyloPeriod
600 instance FromJSON PhyloLevel
601 instance ToJSON PhyloLevel
603 instance FromJSON Software
604 instance ToJSON Software
606 instance FromJSON PhyloGroup
607 instance ToJSON PhyloGroup
609 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)