2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.Phylo where
29 import Control.DeepSeq (NFData)
30 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON)
35 import Data.Text (Text, pack)
36 import Data.Vector (Vector)
38 import GHC.IO (FilePath)
39 import Gargantext.Core.Text.Context (TermList)
40 import Gargantext.Core.Utils.Prefix (unPrefix)
41 import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
42 import Gargantext.Prelude
43 import qualified Data.Text.Lazy as TextLazy
50 Wos {_wos_limit :: Int}
51 | Csv {_csv_limit :: Int}
52 | Csv' {_csv'_limit :: Int}
53 deriving (Show,Generic,Eq)
55 instance ToSchema CorpusParser where
56 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
59 data ListParser = V3 | V4 deriving (Show,Generic,Eq)
60 instance ToSchema ListParser
65 { _cons_start :: Double
66 , _cons_step :: Double }
68 { _adap_granularity :: Double }
69 deriving (Show,Generic,Eq)
71 instance ToSchema SeaElevation
75 { _wlj_sensibility :: Double
77 -- , _wlj_thresholdInit :: Double
78 -- , _wlj_thresholdStep :: Double
79 -- | max height for sea level in temporal matching
80 -- , _wlj_elevation :: Double
84 { _wlj_sensibility :: Double
86 -- , _wlj_thresholdInit :: Double
87 -- , _wlj_thresholdStep :: Double
88 -- | max height for sea level in temporal matching
89 -- , _wlj_elevation :: Double
92 | Hamming { _wlj_sensibility :: Double }
94 deriving (Show,Generic,Eq)
96 instance ToSchema Proximity where
97 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
100 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches
101 deriving (Show,Generic,Eq, ToSchema)
103 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups
104 deriving (Show,Generic,Eq)
106 instance ToSchema SynchronyStrategy where
107 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
112 { _bpt_threshold :: Double
113 , _bpt_sensibility :: Double
114 , _bpt_scope :: SynchronyScope
115 , _bpt_strategy :: SynchronyStrategy }
116 | ByProximityDistribution
117 { _bpd_sensibility :: Double
118 , _bpd_strategy :: SynchronyStrategy }
119 deriving (Show,Generic,Eq)
121 instance ToSchema Synchrony where
122 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
128 { _epoch_period :: Int
130 , _epoch_matchingFrame :: Int }
132 { _year_period :: Int
134 , _year_matchingFrame :: Int }
136 { _month_period :: Int
138 , _month_matchingFrame :: Int }
140 { _week_period :: Int
142 , _week_matchingFrame :: Int }
146 , _day_matchingFrame :: Int }
147 deriving (Show,Generic,Eq)
149 instance ToSchema TimeUnit where
150 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
153 data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
155 instance ToSchema CliqueFilter where
156 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
162 { _fis_support :: Int
166 , _mcl_threshold :: Double
167 , _mcl_filter :: CliqueFilter }
168 deriving (Show,Generic,Eq)
170 instance ToSchema Clique where
171 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
175 Quality { _qua_granularity :: Double
176 , _qua_minBranch :: Int }
177 deriving (Show,Generic,Eq)
179 instance ToSchema Quality where
180 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_qua_")
184 PhyloConfig { corpusPath :: FilePath
185 , listPath :: FilePath
186 , outputPath :: FilePath
187 , corpusParser :: CorpusParser
188 , listParser :: ListParser
191 , phyloProximity :: Proximity
192 , seaElevation :: SeaElevation
193 , findAncestors :: Bool
194 , phyloSynchrony :: Synchrony
195 , phyloQuality :: Quality
196 , timeUnit :: TimeUnit
198 , exportLabel :: [PhyloLabel]
200 , exportFilter :: [Filter]
201 } deriving (Show,Generic,Eq)
204 ------------------------------------------------------------------------
205 data PhyloSubConfig =
206 PhyloSubConfig { _sc_phyloProximity :: Double
207 , _sc_phyloSynchrony :: Double
208 , _sc_phyloQuality :: Double
209 , _sc_timeUnit :: TimeUnit
210 , _sc_clique :: Clique
211 , _sc_exportFilter :: Double
213 deriving (Show,Generic,Eq)
216 subConfig2config :: PhyloSubConfig -> PhyloConfig
217 subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard $ _sc_phyloProximity subConfig
218 , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
219 , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
220 , timeUnit = _sc_timeUnit subConfig
221 , clique = _sc_clique subConfig
222 , exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
225 ------------------------------------------------------------------------
226 defaultConfig :: PhyloConfig
228 PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
229 , listPath = "list.csv" -- useful for commandline only
230 , outputPath = "data/"
231 , corpusParser = Csv 100000
233 , phyloName = pack "Phylo Name"
235 , phyloProximity = WeightedLogJaccard 0.5
236 , seaElevation = Constante 0.1 0.1
237 , findAncestors = False
238 , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
239 , phyloQuality = Quality 0.5 1
240 , timeUnit = Year 3 1 5
241 , clique = MaxClique 5 0.0001 ByThreshold
242 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
243 , exportSort = ByHierarchy Desc
244 , exportFilter = [ByBranchSize 3]
248 instance ToSchema PhyloConfig
249 instance ToSchema PhyloSubConfig
251 instance FromJSON PhyloConfig
252 instance ToJSON PhyloConfig
254 instance FromJSON PhyloSubConfig
255 instance ToJSON PhyloSubConfig
257 instance FromJSON CorpusParser
258 instance ToJSON CorpusParser
260 instance FromJSON ListParser
261 instance ToJSON ListParser
263 instance FromJSON Proximity
264 instance ToJSON Proximity
266 instance FromJSON SeaElevation
267 instance ToJSON SeaElevation
269 instance FromJSON TimeUnit
270 instance ToJSON TimeUnit
272 instance FromJSON CliqueFilter
273 instance ToJSON CliqueFilter
275 instance FromJSON Clique
276 instance ToJSON Clique
278 instance FromJSON PhyloLabel
279 instance ToJSON PhyloLabel
281 instance FromJSON Tagger
282 instance ToJSON Tagger
284 instance FromJSON Sort
287 instance FromJSON Order
288 instance ToJSON Order
290 instance FromJSON Filter
291 instance ToJSON Filter
293 instance FromJSON SynchronyScope
294 instance ToJSON SynchronyScope
296 instance FromJSON SynchronyStrategy
297 instance ToJSON SynchronyStrategy
299 instance FromJSON Synchrony
300 instance ToJSON Synchrony
302 instance FromJSON Quality
303 instance ToJSON Quality
306 -- | Software parameters
308 Software { _software_name :: Text
309 , _software_version :: Text
310 } deriving (Generic, Show, Eq)
312 instance ToSchema Software where
313 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
317 defaultSoftware :: Software
319 Software { _software_name = pack "Gargantext"
320 , _software_version = pack "v4" }
323 -- | Global parameters of a Phylo
325 PhyloParam { _phyloParam_version :: Text
326 , _phyloParam_software :: Software
327 , _phyloParam_config :: PhyloConfig
328 } deriving (Generic, Show, Eq)
330 instance ToSchema PhyloParam where
331 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
335 defaultPhyloParam :: PhyloParam
337 PhyloParam { _phyloParam_version = pack "v2.adaptative"
338 , _phyloParam_software = defaultSoftware
339 , _phyloParam_config = defaultConfig }
346 -- | Date : a simple Integer
349 -- | Ngrams : a contiguous sequence of n terms
352 -- Document : a piece of Text linked to a Date
353 -- date = computational date; date' = original string date yyyy-mm-dd
354 -- Export Database to Document
355 data Document = Document
356 { date :: Date -- datatype Date {unDate :: Int}
357 , date' :: Text -- show date
359 , weight :: Maybe Double
361 } deriving (Eq,Show,Generic,NFData)
369 -- | The Foundations of a Phylo created from a given TermList
370 data PhyloFoundations = PhyloFoundations
371 { _foundations_roots :: !(Vector Ngrams)
372 , _foundations_mapList :: TermList
373 } deriving (Generic, Show, Eq)
375 instance ToSchema PhyloFoundations where
376 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_")
380 data PhyloSources = PhyloSources
381 { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
383 instance ToSchema PhyloSources where
384 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
386 ---------------------------
387 -- | Coocurency Matrix | --
388 ---------------------------
391 -- | Cooc : a coocurency matrix between two ngrams
392 type Cooc = Map (Int,Int) Double
400 -- | Phylo datatype of a phylomemy
401 -- foundations : the foundations of the phylo
402 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
403 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
404 -- param : the parameters of the phylomemy (with the user's configuration)
405 -- periods : the temporal steps of a phylomemy
407 Phylo { _phylo_foundations :: PhyloFoundations
408 , _phylo_sources :: PhyloSources
409 , _phylo_timeCooc :: !(Map Date Cooc)
410 , _phylo_timeDocs :: !(Map Date Double)
411 , _phylo_termFreq :: !(Map Int Double)
412 , _phylo_lastTermFreq :: !(Map Int Double)
413 , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
414 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
415 , _phylo_param :: PhyloParam
416 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
418 deriving (Generic, Show, Eq)
420 instance ToSchema Phylo where
421 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
424 -- | PhyloPeriodId : the id of a given period
425 type PhyloPeriodId = (Date,Date)
427 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
428 -- id: tuple (start date, end date) of the temporal step of the phylomemy
429 -- levels: levels of granularity
431 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
432 , _phylo_periodPeriod' :: (Text,Text)
433 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
434 } deriving (Generic, Show, Eq)
436 instance ToSchema PhyloPeriod where
437 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
441 -- | Level : a level of clustering
444 -- | PhyloLevelId : the id of a level of clustering in a given period
445 type PhyloLevelId = (PhyloPeriodId,Level)
447 -- | PhyloLevel : levels of phylomemy on a synchronic axis
448 -- Levels description:
449 -- Level 0: The foundations and the base of the phylo
450 -- Level 1: First level of clustering (the Fis)
451 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
453 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
454 , _phylo_levelPeriod' :: (Text,Text)
455 , _phylo_levelLevel :: Level
456 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
458 deriving (Generic, Show, Eq)
460 instance ToSchema PhyloLevel where
461 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
464 type PhyloGroupId = (PhyloLevelId, Int)
466 -- | BranchId : (a level, a sequence of branch index)
467 -- the sequence is a path of heritage from the most to the less specific branch
468 type PhyloBranchId = (Level, [Int])
470 -- | PhyloGroup : group of ngrams at each level and period
472 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
473 , _phylo_groupPeriod' :: (Text,Text)
474 , _phylo_groupLevel :: Level
475 , _phylo_groupIndex :: Int
476 , _phylo_groupLabel :: Text
477 , _phylo_groupSupport :: Support
478 , _phylo_groupWeight :: Maybe Double
479 , _phylo_groupSources :: [Int]
480 , _phylo_groupNgrams :: [Int]
481 , _phylo_groupCooc :: !(Cooc)
482 , _phylo_groupBranchId :: PhyloBranchId
483 , _phylo_groupMeta :: Map Text [Double]
484 , _phylo_groupLevelParents :: [Pointer]
485 , _phylo_groupLevelChilds :: [Pointer]
486 , _phylo_groupPeriodParents :: [Pointer]
487 , _phylo_groupPeriodChilds :: [Pointer]
488 , _phylo_groupAncestors :: [Pointer]
489 , _phylo_groupPeriodMemoryParents :: [Pointer']
490 , _phylo_groupPeriodMemoryChilds :: [Pointer']
492 deriving (Generic, Show, Eq, NFData)
494 instance ToSchema PhyloGroup where
495 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
498 -- | Weight : A generic mesure that can be associated with an Id
502 -- | Pointer : A weighted pointer to a given PhyloGroup
503 type Pointer = (PhyloGroupId, Weight)
504 -- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
505 type Pointer' = (PhyloGroupId, (Thr,Weight))
507 data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show)
508 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
511 ----------------------
512 -- | Phylo Clique | --
513 ----------------------
515 -- | Support : Number of Documents where a Clique occurs
518 data PhyloClique = PhyloClique
519 { _phyloClique_nodes :: [Int]
520 , _phyloClique_support :: Support
521 , _phyloClique_period :: (Date,Date)
522 , _phyloClique_weight :: Maybe Double
523 , _phyloClique_sources :: [Int]
524 } deriving (Generic,NFData,Show,Eq)
530 type DotId = TextLazy.Text
532 data EdgeType = GroupToGroup | GroupToGroupMemory | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
534 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
535 instance ToSchema Filter where
536 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
539 data Order = Asc | Desc deriving (Show,Generic,Eq, ToSchema)
541 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy {_sort_order :: Order } deriving (Show,Generic,Eq)
542 instance ToSchema Sort where
543 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sort_")
546 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
547 instance ToSchema Tagger where
548 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
553 { _branch_labelTagger :: Tagger
554 , _branch_labelSize :: Int }
556 { _group_labelTagger :: Tagger
557 , _group_labelSize :: Int }
558 deriving (Show,Generic,Eq)
560 instance ToSchema PhyloLabel where
561 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
566 { _branch_id :: PhyloBranchId
567 , _branch_canonId :: [Int]
568 , _branch_seaLevel :: [Double]
569 , _branch_x :: Double
570 , _branch_y :: Double
571 , _branch_w :: Double
572 , _branch_t :: Double
573 , _branch_label :: Text
574 , _branch_meta :: Map Text [Double]
575 } deriving (Generic, Show, Eq)
577 instance ToSchema PhyloBranch where
578 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_branch_")
582 { _export_groups :: [PhyloGroup]
583 , _export_branches :: [PhyloBranch]
584 } deriving (Generic, Show)
585 instance ToSchema PhyloExport where
586 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_export_")
593 makeLenses ''PhyloConfig
594 makeLenses ''PhyloSubConfig
595 makeLenses ''Proximity
596 makeLenses ''SeaElevation
599 makeLenses ''PhyloLabel
600 makeLenses ''TimeUnit
601 makeLenses ''PhyloFoundations
602 makeLenses ''PhyloClique
604 makeLenses ''PhyloPeriod
605 makeLenses ''PhyloLevel
606 makeLenses ''PhyloGroup
607 makeLenses ''PhyloParam
608 makeLenses ''PhyloExport
609 makeLenses ''PhyloBranch
611 ------------------------
612 -- | JSON instances | --
613 ------------------------
615 instance FromJSON Phylo
616 instance ToJSON Phylo
618 instance FromJSON PhyloSources
619 instance ToJSON PhyloSources
621 instance FromJSON PhyloParam
622 instance ToJSON PhyloParam
624 instance FromJSON PhyloPeriod
625 instance ToJSON PhyloPeriod
627 instance FromJSON PhyloLevel
628 instance ToJSON PhyloLevel
630 instance FromJSON Software
631 instance ToJSON Software
633 instance FromJSON PhyloGroup
634 instance ToJSON PhyloGroup
636 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)