2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.Phylo where
29 import Control.DeepSeq (NFData)
30 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON)
35 import Data.Text (Text, pack)
36 import Data.Vector (Vector)
38 import GHC.IO (FilePath)
39 import Gargantext.Core.Text.Context (TermList)
40 import Gargantext.Core.Utils.Prefix (unPrefix)
41 import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
42 import Gargantext.Prelude
43 import qualified Data.Text.Lazy as TextLazy
50 Wos {_wos_limit :: Int}
51 | Csv {_csv_limit :: Int}
52 | Csv' {_csv'_limit :: Int}
53 deriving (Show,Generic,Eq)
55 instance ToSchema CorpusParser where
56 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
59 data ListParser = V3 | V4 deriving (Show,Generic,Eq)
60 instance ToSchema ListParser
65 { _cons_start :: Double
66 , _cons_step :: Double }
68 { _adap_granularity :: Double }
69 deriving (Show,Generic,Eq)
71 instance ToSchema SeaElevation
75 { _wlj_sensibility :: Double
76 , _wlj_minSharedNgrams :: Int }
78 { _wls_sensibility :: Double
79 , _wls_minSharedNgrams :: Int }
81 { _hmg_sensibility :: Double
82 , _hmg_minSharedNgrams :: Int}
84 deriving (Show,Generic,Eq)
86 instance ToSchema Proximity where
87 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
90 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches
91 deriving (Show,Generic,Eq, ToSchema)
93 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups
94 deriving (Show,Generic,Eq)
96 instance ToSchema SynchronyStrategy where
97 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
102 { _bpt_threshold :: Double
103 , _bpt_sensibility :: Double
104 , _bpt_scope :: SynchronyScope
105 , _bpt_strategy :: SynchronyStrategy }
106 | ByProximityDistribution
107 { _bpd_sensibility :: Double
108 , _bpd_strategy :: SynchronyStrategy }
109 deriving (Show,Generic,Eq)
111 instance ToSchema Synchrony where
112 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
118 { _epoch_period :: Int
120 , _epoch_matchingFrame :: Int }
122 { _year_period :: Int
124 , _year_matchingFrame :: Int }
126 { _month_period :: Int
128 , _month_matchingFrame :: Int }
130 { _week_period :: Int
132 , _week_matchingFrame :: Int }
136 , _day_matchingFrame :: Int }
137 deriving (Show,Generic,Eq)
139 instance ToSchema TimeUnit where
140 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
143 data MaxCliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
145 instance ToSchema MaxCliqueFilter where
146 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
152 { _fis_support :: Int
156 , _mcl_threshold :: Double
157 , _mcl_filter :: MaxCliqueFilter }
158 deriving (Show,Generic,Eq)
160 instance ToSchema Cluster where
161 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
165 Quality { _qua_granularity :: Double
166 , _qua_minBranch :: Int }
167 deriving (Show,Generic,Eq)
169 instance ToSchema Quality where
170 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_qua_")
174 PhyloConfig { corpusPath :: FilePath
175 , listPath :: FilePath
176 , outputPath :: FilePath
177 , corpusParser :: CorpusParser
178 , listParser :: ListParser
181 , phyloProximity :: Proximity
182 , seaElevation :: SeaElevation
183 , findAncestors :: Bool
184 , phyloSynchrony :: Synchrony
185 , phyloQuality :: Quality
186 , timeUnit :: TimeUnit
188 , exportLabel :: [PhyloLabel]
190 , exportFilter :: [Filter]
191 } deriving (Show,Generic,Eq)
194 ------------------------------------------------------------------------
195 data PhyloSubConfig =
196 PhyloSubConfig { _sc_phyloProximity :: Double
197 , _sc_phyloSynchrony :: Double
198 , _sc_phyloQuality :: Double
199 , _sc_timeUnit :: TimeUnit
200 , _sc_clique :: Cluster
201 , _sc_exportFilter :: Double
203 deriving (Show,Generic,Eq)
206 subConfig2config :: PhyloSubConfig -> PhyloConfig
207 subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
208 , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
209 , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
210 , timeUnit = _sc_timeUnit subConfig
211 , clique = _sc_clique subConfig
212 , exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
215 ------------------------------------------------------------------------
216 defaultConfig :: PhyloConfig
218 PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
219 , listPath = "list.csv" -- useful for commandline only
220 , outputPath = "data/"
221 , corpusParser = Csv 100000
223 , phyloName = pack "Phylo Name"
225 , phyloProximity = WeightedLogJaccard 0.5 1
226 , seaElevation = Constante 0.1 0.1
227 , findAncestors = False
228 , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
229 , phyloQuality = Quality 0.5 1
230 , timeUnit = Year 3 1 5
231 , clique = MaxClique 5 0.0001 ByThreshold
232 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
233 , exportSort = ByHierarchy Desc
234 , exportFilter = [ByBranchSize 3]
238 instance ToSchema PhyloConfig
239 instance ToSchema PhyloSubConfig
241 instance FromJSON PhyloConfig
242 instance ToJSON PhyloConfig
244 instance FromJSON PhyloSubConfig
245 instance ToJSON PhyloSubConfig
247 instance FromJSON CorpusParser
248 instance ToJSON CorpusParser
250 instance FromJSON ListParser
251 instance ToJSON ListParser
253 instance FromJSON Proximity
254 instance ToJSON Proximity
256 instance FromJSON SeaElevation
257 instance ToJSON SeaElevation
259 instance FromJSON TimeUnit
260 instance ToJSON TimeUnit
262 instance FromJSON MaxCliqueFilter
263 instance ToJSON MaxCliqueFilter
265 instance FromJSON Cluster
266 instance ToJSON Cluster
268 instance FromJSON PhyloLabel
269 instance ToJSON PhyloLabel
271 instance FromJSON Tagger
272 instance ToJSON Tagger
274 instance FromJSON Sort
277 instance FromJSON Order
278 instance ToJSON Order
280 instance FromJSON Filter
281 instance ToJSON Filter
283 instance FromJSON SynchronyScope
284 instance ToJSON SynchronyScope
286 instance FromJSON SynchronyStrategy
287 instance ToJSON SynchronyStrategy
289 instance FromJSON Synchrony
290 instance ToJSON Synchrony
292 instance FromJSON Quality
293 instance ToJSON Quality
296 -- | Software parameters
298 Software { _software_name :: Text
299 , _software_version :: Text
300 } deriving (Generic, Show, Eq)
302 instance ToSchema Software where
303 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
307 defaultSoftware :: Software
309 Software { _software_name = pack "Gargantext"
310 , _software_version = pack "v4" }
313 -- | Global parameters of a Phylo
315 PhyloParam { _phyloParam_version :: Text
316 , _phyloParam_software :: Software
317 , _phyloParam_config :: PhyloConfig
318 } deriving (Generic, Show, Eq)
320 instance ToSchema PhyloParam where
321 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
325 defaultPhyloParam :: PhyloParam
327 PhyloParam { _phyloParam_version = pack "v2.adaptative"
328 , _phyloParam_software = defaultSoftware
329 , _phyloParam_config = defaultConfig }
336 -- | Date : a simple Integer
339 -- | DateStr : the string version of a Date
342 -- | Ngrams : a contiguous sequence of n terms
345 -- Document : a piece of Text linked to a Date
346 -- date = computational date; date' = original string date yyyy-mm-dd
347 -- Export Database to Document
348 data Document = Document
349 { date :: Date -- datatype Date {unDate :: Int}
350 , date' :: DateStr -- show date
352 , weight :: Maybe Double
354 } deriving (Eq,Show,Generic,NFData)
362 -- | The Foundations of a Phylo created from a given TermList
363 data PhyloFoundations = PhyloFoundations
364 { _foundations_roots :: !(Vector Ngrams)
365 , _foundations_mapList :: TermList
366 } deriving (Generic, Show, Eq)
368 instance ToSchema PhyloFoundations where
369 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_")
373 data PhyloSources = PhyloSources
374 { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
376 instance ToSchema PhyloSources where
377 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
379 ---------------------------
380 -- | Coocurency Matrix | --
381 ---------------------------
384 -- | Cooc : a coocurency matrix between two ngrams
385 type Cooc = Map (Int,Int) Double
392 -- | Period : a tuple of Dates
393 type Period = (Date,Date)
395 -- | PeriodStr : a tuple of DateStr
396 type PeriodStr = (DateStr,DateStr)
399 -- | Phylo datatype of a phylomemy
400 -- foundations : the foundations of the phylo
401 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
402 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
403 -- param : the parameters of the phylomemy (with the user's configuration)
404 -- periods : the temporal steps of a phylomemy
406 Phylo { _phylo_foundations :: PhyloFoundations
407 , _phylo_sources :: PhyloSources
408 , _phylo_timeCooc :: !(Map Date Cooc)
409 , _phylo_timeDocs :: !(Map Date Double)
410 , _phylo_termFreq :: !(Map Int Double)
411 , _phylo_lastTermFreq :: !(Map Int Double)
412 , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
413 , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
414 , _phylo_param :: PhyloParam
415 , _phylo_periods :: Map Period PhyloPeriod
416 , _phylo_quality :: Double
418 deriving (Generic, Show, Eq)
420 instance ToSchema Phylo where
421 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
428 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
429 -- id: tuple (start date, end date) of the temporal step of the phylomemy
430 -- scales: scales of synchronic description
432 PhyloPeriod { _phylo_periodPeriod :: Period
433 , _phylo_periodPeriodStr :: PeriodStr
434 , _phylo_periodScales :: Map PhyloScaleId PhyloScale
435 } deriving (Generic, Show, Eq)
437 instance ToSchema PhyloPeriod where
438 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
444 -- | Scale : a scale of synchronic description
447 -- | PhyloScaleId : the id of a scale of synchronic description
448 type PhyloScaleId = (Period,Scale)
450 -- | PhyloScale : sub-structure of the phylomemy in scale of synchronic description
452 PhyloScale { _phylo_scalePeriod :: Period
453 , _phylo_scalePeriodStr :: PeriodStr
454 , _phylo_scaleScale :: Scale
455 , _phylo_scaleGroups :: Map PhyloGroupId PhyloGroup
457 deriving (Generic, Show, Eq)
459 instance ToSchema PhyloScale where
460 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
463 type PhyloGroupId = (PhyloScaleId, Int)
465 -- | BranchId : (a scale, a sequence of branch index)
466 -- the sequence is a path of heritage from the most to the less specific branch
467 type PhyloBranchId = (Scale, [Int])
469 -- | PhyloGroup : group of ngrams at each scale and period
471 PhyloGroup { _phylo_groupPeriod :: Period
472 , _phylo_groupPeriod' :: (Text,Text)
473 , _phylo_groupScale :: Scale
474 , _phylo_groupIndex :: Int
475 , _phylo_groupLabel :: Text
476 , _phylo_groupSupport :: Support
477 , _phylo_groupWeight :: Maybe Double
478 , _phylo_groupSources :: [Int]
479 , _phylo_groupNgrams :: [Int]
480 , _phylo_groupCooc :: !(Cooc)
481 , _phylo_groupBranchId :: PhyloBranchId
482 , _phylo_groupMeta :: Map Text [Double]
483 , _phylo_groupScaleParents :: [Pointer]
484 , _phylo_groupScaleChilds :: [Pointer]
485 , _phylo_groupPeriodParents :: [Pointer]
486 , _phylo_groupPeriodChilds :: [Pointer]
487 , _phylo_groupAncestors :: [Pointer]
488 , _phylo_groupPeriodMemoryParents :: [Pointer']
489 , _phylo_groupPeriodMemoryChilds :: [Pointer']
491 deriving (Generic, Show, Eq, NFData)
493 instance ToSchema PhyloGroup where
494 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
497 -- | Weight : A generic mesure that can be associated with an Id
501 -- | Pointer : A weighted pointer to a given PhyloGroup
502 type Pointer = (PhyloGroupId, Weight)
503 -- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
504 type Pointer' = (PhyloGroupId, (Thr,Weight))
506 data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show)
507 data PointerType = TemporalPointer | ScalePointer deriving (Generic, Show)
510 --------------------------
511 -- | Phylo Clustering | --
512 --------------------------
514 -- | Support : Number of Documents where a Cluster occurs
517 data Clustering = Clustering
518 { _clustering_roots :: [Int]
519 , _clustering_support :: Support
520 , _clustering_period :: Period
521 -- additional materials for visualization
522 , _clustering_visWeighting :: Maybe Double
523 , _clustering_visFiltering :: [Int]
524 } deriving (Generic,NFData,Show,Eq)
530 type DotId = TextLazy.Text
532 data EdgeType = GroupToGroup | GroupToGroupMemory | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
534 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
535 instance ToSchema Filter where
536 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
539 data Order = Asc | Desc deriving (Show,Generic,Eq, ToSchema)
541 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy {_sort_order :: Order } deriving (Show,Generic,Eq)
542 instance ToSchema Sort where
543 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sort_")
546 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
547 instance ToSchema Tagger where
548 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
553 { _branch_labelTagger :: Tagger
554 , _branch_labelSize :: Int }
556 { _group_labelTagger :: Tagger
557 , _group_labelSize :: Int }
558 deriving (Show,Generic,Eq)
560 instance ToSchema PhyloLabel where
561 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
566 { _branch_id :: PhyloBranchId
567 , _branch_canonId :: [Int]
568 , _branch_seaLevel :: [Double]
569 , _branch_x :: Double
570 , _branch_y :: Double
571 , _branch_w :: Double
572 , _branch_t :: Double
573 , _branch_label :: Text
574 , _branch_meta :: Map Text [Double]
575 } deriving (Generic, Show, Eq)
577 instance ToSchema PhyloBranch where
578 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_branch_")
582 { _export_groups :: [PhyloGroup]
583 , _export_branches :: [PhyloBranch]
584 } deriving (Generic, Show)
585 instance ToSchema PhyloExport where
586 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_export_")
593 makeLenses ''PhyloConfig
594 makeLenses ''PhyloSubConfig
595 makeLenses ''Proximity
596 makeLenses ''SeaElevation
599 makeLenses ''PhyloLabel
600 makeLenses ''TimeUnit
601 makeLenses ''PhyloFoundations
602 makeLenses ''Clustering
604 makeLenses ''PhyloPeriod
605 makeLenses ''PhyloScale
606 makeLenses ''PhyloGroup
607 makeLenses ''PhyloParam
608 makeLenses ''PhyloExport
609 makeLenses ''PhyloBranch
611 ------------------------
612 -- | JSON instances | --
613 ------------------------
615 instance FromJSON Phylo
616 instance ToJSON Phylo
618 instance FromJSON PhyloSources
619 instance ToJSON PhyloSources
621 instance FromJSON PhyloParam
622 instance ToJSON PhyloParam
624 instance FromJSON PhyloPeriod
625 instance ToJSON PhyloPeriod
627 instance FromJSON PhyloScale
628 instance ToJSON PhyloScale
630 instance FromJSON Software
631 instance ToJSON Software
633 instance FromJSON PhyloGroup
634 instance ToJSON PhyloGroup
636 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)