2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.Phylo where
29 import Control.DeepSeq (NFData)
30 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Map.Strict (Map)
36 import Data.Text (Text, pack)
37 import Data.Vector (Vector)
39 import GHC.IO (FilePath)
40 import Gargantext.Core.Text.Context (TermList)
41 import Gargantext.Core.Utils.Prefix (unPrefix)
42 import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
43 import Gargantext.Prelude
44 import qualified Data.Text.Lazy as TextLazy
51 Wos {_wos_limit :: Int}
52 | Csv {_csv_limit :: Int}
53 | Csv' {_csv'_limit :: Int}
54 deriving (Show,Generic,Eq)
56 instance ToSchema CorpusParser where
57 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
60 data ListParser = V3 | V4 deriving (Show,Generic,Eq)
61 instance ToSchema ListParser
66 { _cons_start :: Double
67 , _cons_gap :: Double }
69 { _adap_steps :: Double }
70 deriving (Show,Generic,Eq)
72 instance ToSchema SeaElevation
76 { _wlj_sensibility :: Double
77 , _wlj_minSharedNgrams :: Int }
79 { _wls_sensibility :: Double
80 , _wls_minSharedNgrams :: Int }
82 { _hmg_sensibility :: Double
83 , _hmg_minSharedNgrams :: Int}
85 deriving (Show,Generic,Eq)
87 instance ToSchema Proximity where
88 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
91 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches
92 deriving (Show,Generic,Eq, ToSchema)
94 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups
95 deriving (Show,Generic,Eq)
97 instance ToSchema SynchronyStrategy where
98 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
103 { _bpt_threshold :: Double
104 , _bpt_sensibility :: Double
105 , _bpt_scope :: SynchronyScope
106 , _bpt_strategy :: SynchronyStrategy }
107 | ByProximityDistribution
108 { _bpd_sensibility :: Double
109 , _bpd_strategy :: SynchronyStrategy }
110 deriving (Show,Generic,Eq)
112 instance ToSchema Synchrony where
113 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
119 { _epoch_period :: Int
121 , _epoch_matchingFrame :: Int }
123 { _year_period :: Int
125 , _year_matchingFrame :: Int }
127 { _month_period :: Int
129 , _month_matchingFrame :: Int }
131 { _week_period :: Int
133 , _week_matchingFrame :: Int }
137 , _day_matchingFrame :: Int }
138 deriving (Show,Generic,Eq)
140 instance ToSchema TimeUnit where
141 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
144 data MaxCliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
146 instance ToSchema MaxCliqueFilter where
147 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
153 { _fis_support :: Int
157 , _mcl_threshold :: Double
158 , _mcl_filter :: MaxCliqueFilter }
159 deriving (Show,Generic,Eq)
161 instance ToSchema Cluster where
162 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
166 Quality { _qua_granularity :: Double
167 , _qua_minBranch :: Int }
168 deriving (Show,Generic,Eq)
170 instance ToSchema Quality where
171 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_qua_")
175 PhyloConfig { corpusPath :: FilePath
176 , listPath :: FilePath
177 , outputPath :: FilePath
178 , corpusParser :: CorpusParser
179 , listParser :: ListParser
182 , phyloProximity :: Proximity
183 , seaElevation :: SeaElevation
184 , findAncestors :: Bool
185 , phyloSynchrony :: Synchrony
186 , phyloQuality :: Quality
187 , timeUnit :: TimeUnit
189 , exportLabel :: [PhyloLabel]
191 , exportFilter :: [Filter]
192 } deriving (Show,Generic,Eq)
195 ------------------------------------------------------------------------
196 data PhyloSubConfig =
197 PhyloSubConfig { _sc_phyloProximity :: Double
198 , _sc_phyloSynchrony :: Double
199 , _sc_phyloQuality :: Double
200 , _sc_timeUnit :: TimeUnit
201 , _sc_clique :: Cluster
202 , _sc_exportFilter :: Double
204 deriving (Show,Generic,Eq)
207 subConfig2config :: PhyloSubConfig -> PhyloConfig
208 subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
209 , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
210 , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
211 , timeUnit = _sc_timeUnit subConfig
212 , clique = _sc_clique subConfig
213 , exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
216 ------------------------------------------------------------------------
217 defaultConfig :: PhyloConfig
219 PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
220 , listPath = "list.csv" -- useful for commandline only
221 , outputPath = "data/"
222 , corpusParser = Csv 100000
224 , phyloName = pack "Phylo Name"
226 , phyloProximity = WeightedLogJaccard 0.5 1
227 , seaElevation = Constante 0.1 0.1
228 , findAncestors = False
229 , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
230 , phyloQuality = Quality 0.5 1
231 , timeUnit = Year 3 1 5
232 , clique = MaxClique 5 0.0001 ByThreshold
233 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
234 , exportSort = ByHierarchy Desc
235 , exportFilter = [ByBranchSize 3]
239 instance ToSchema PhyloConfig
240 instance ToSchema PhyloSubConfig
242 instance FromJSON PhyloConfig
243 instance ToJSON PhyloConfig
245 instance FromJSON PhyloSubConfig
246 instance ToJSON PhyloSubConfig
248 instance FromJSON CorpusParser
249 instance ToJSON CorpusParser
251 instance FromJSON ListParser
252 instance ToJSON ListParser
254 instance FromJSON Proximity
255 instance ToJSON Proximity
257 instance FromJSON SeaElevation
258 instance ToJSON SeaElevation
260 instance FromJSON TimeUnit
261 instance ToJSON TimeUnit
263 instance FromJSON MaxCliqueFilter
264 instance ToJSON MaxCliqueFilter
266 instance FromJSON Cluster
267 instance ToJSON Cluster
269 instance FromJSON PhyloLabel
270 instance ToJSON PhyloLabel
272 instance FromJSON Tagger
273 instance ToJSON Tagger
275 instance FromJSON Sort
278 instance FromJSON Order
279 instance ToJSON Order
281 instance FromJSON Filter
282 instance ToJSON Filter
284 instance FromJSON SynchronyScope
285 instance ToJSON SynchronyScope
287 instance FromJSON SynchronyStrategy
288 instance ToJSON SynchronyStrategy
290 instance FromJSON Synchrony
291 instance ToJSON Synchrony
293 instance FromJSON Quality
294 instance ToJSON Quality
297 -- | Software parameters
299 Software { _software_name :: Text
300 , _software_version :: Text
301 } deriving (Generic, Show, Eq)
303 instance ToSchema Software where
304 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
308 defaultSoftware :: Software
310 Software { _software_name = pack "GarganText"
311 , _software_version = pack "v5" }
314 -- | Global parameters of a Phylo
316 PhyloParam { _phyloParam_version :: Text
317 , _phyloParam_software :: Software
318 , _phyloParam_config :: PhyloConfig
319 } deriving (Generic, Show, Eq)
321 instance ToSchema PhyloParam where
322 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
326 defaultPhyloParam :: PhyloParam
328 PhyloParam { _phyloParam_version = pack "v3"
329 , _phyloParam_software = defaultSoftware
330 , _phyloParam_config = defaultConfig }
337 -- | Date : a simple Integer
340 -- | DateStr : the string version of a Date
343 -- | Ngrams : a contiguous sequence of n terms
346 -- Document : a piece of Text linked to a Date
347 -- date = computational date; date' = original string date yyyy-mm-dd
348 -- Export Database to Document
349 data Document = Document
350 { date :: Date -- datatype Date {unDate :: Int}
351 , date' :: DateStr -- show date
353 , weight :: Maybe Double
355 } deriving (Eq,Show,Generic,NFData)
363 -- | The Foundations of a Phylo created from a given TermList
364 data PhyloFoundations = PhyloFoundations
365 { _foundations_roots :: !(Vector Ngrams)
366 , _foundations_mapList :: TermList
367 } deriving (Generic, Show, Eq)
369 instance ToSchema PhyloFoundations where
370 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_")
374 data PhyloSources = PhyloSources
375 { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
377 instance ToSchema PhyloSources where
378 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
380 ---------------------------
381 -- | Coocurency Matrix | --
382 ---------------------------
385 -- | Cooc : a coocurency matrix between two ngrams
386 type Cooc = Map (Int,Int) Double
393 -- | Period : a tuple of Dates
394 type Period = (Date,Date)
396 -- | PeriodStr : a tuple of DateStr
397 type PeriodStr = (DateStr,DateStr)
400 -- | Phylo datatype of a phylomemy
401 -- foundations : the foundations of the phylo
402 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
403 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
404 -- param : the parameters of the phylomemy (with the user's configuration)
405 -- periods : the temporal steps of a phylomemy
407 Phylo { _phylo_foundations :: PhyloFoundations
408 , _phylo_sources :: PhyloSources
409 , _phylo_timeCooc :: !(Map Date Cooc)
410 , _phylo_timeDocs :: !(Map Date Double)
411 , _phylo_termFreq :: !(Map Int Double)
412 , _phylo_lastTermFreq :: !(Map Int Double)
413 , _phylo_diaSimScan :: Set Double
414 , _phylo_param :: PhyloParam
415 , _phylo_periods :: Map Period PhyloPeriod
416 , _phylo_quality :: Double
418 deriving (Generic, Show, Eq)
420 instance ToSchema Phylo where
421 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
428 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
429 -- id: tuple (start date, end date) of the temporal step of the phylomemy
430 -- scales: scales of synchronic description
432 PhyloPeriod { _phylo_periodPeriod :: Period
433 , _phylo_periodPeriodStr :: PeriodStr
434 , _phylo_periodScales :: Map PhyloScaleId PhyloScale
435 } deriving (Generic, Show, Eq)
437 instance ToSchema PhyloPeriod where
438 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
444 -- | Scale : a scale of synchronic description
447 -- | PhyloScaleId : the id of a scale of synchronic description
448 type PhyloScaleId = (Period,Scale)
450 -- | PhyloScale : sub-structure of the phylomemy in scale of synchronic description
452 PhyloScale { _phylo_scalePeriod :: Period
453 , _phylo_scalePeriodStr :: PeriodStr
454 , _phylo_scaleScale :: Scale
455 , _phylo_scaleGroups :: Map PhyloGroupId PhyloGroup
457 deriving (Generic, Show, Eq)
459 instance ToSchema PhyloScale where
460 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
463 type PhyloGroupId = (PhyloScaleId, Int)
465 -- | BranchId : (a scale, a sequence of branch index)
466 -- the sequence is a path of heritage from the most to the less specific branch
467 type PhyloBranchId = (Scale, [Int])
469 -- | PhyloGroup : group of ngrams at each scale and period
471 PhyloGroup { _phylo_groupPeriod :: Period
472 , _phylo_groupPeriod' :: (Text,Text)
473 , _phylo_groupScale :: Scale
474 , _phylo_groupIndex :: Int
475 , _phylo_groupLabel :: Text
476 , _phylo_groupSupport :: Support
477 , _phylo_groupWeight :: Maybe Double
478 , _phylo_groupSources :: [Int]
479 , _phylo_groupNgrams :: [Int]
480 , _phylo_groupCooc :: !(Cooc)
481 , _phylo_groupBranchId :: PhyloBranchId
482 , _phylo_groupMeta :: Map Text [Double]
483 , _phylo_groupScaleParents :: [Pointer]
484 , _phylo_groupScaleChilds :: [Pointer]
485 , _phylo_groupPeriodParents :: [Pointer]
486 , _phylo_groupPeriodChilds :: [Pointer]
487 , _phylo_groupAncestors :: [Pointer]
488 , _phylo_groupPeriodMemoryParents :: [Pointer']
489 , _phylo_groupPeriodMemoryChilds :: [Pointer']
491 deriving (Generic, Show, Eq, NFData)
493 instance ToSchema PhyloGroup where
494 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
497 -- | Weight : A generic mesure that can be associated with an Id
501 -- | Pointer : A weighted pointer to a given PhyloGroup
502 type Pointer = (PhyloGroupId, Weight)
503 -- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
504 type Pointer' = (PhyloGroupId, (Thr,Weight))
506 data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show)
507 data PointerType = TemporalPointer | ScalePointer deriving (Generic, Show)
510 --------------------------
511 -- | Phylo Clustering | --
512 --------------------------
514 -- | Support : Number of Documents where a Cluster occurs
517 data Clustering = Clustering
518 { _clustering_roots :: [Int]
519 , _clustering_support :: Support
520 , _clustering_period :: Period
521 -- additional materials for visualization
522 , _clustering_visWeighting :: Maybe Double
523 , _clustering_visFiltering :: [Int]
524 } deriving (Generic,NFData,Show,Eq)
530 type DotId = TextLazy.Text
532 data EdgeType = GroupToGroup | GroupToGroupMemory | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
534 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
535 instance ToSchema Filter where
536 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
539 data Order = Asc | Desc deriving (Show,Generic,Eq, ToSchema)
541 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy {_sort_order :: Order } deriving (Show,Generic,Eq)
542 instance ToSchema Sort where
543 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sort_")
546 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
547 instance ToSchema Tagger where
548 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
553 { _branch_labelTagger :: Tagger
554 , _branch_labelSize :: Int }
556 { _group_labelTagger :: Tagger
557 , _group_labelSize :: Int }
558 deriving (Show,Generic,Eq)
560 instance ToSchema PhyloLabel where
561 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
566 { _branch_id :: PhyloBranchId
567 , _branch_canonId :: [Int]
568 , _branch_seaLevel :: [Double]
569 , _branch_x :: Double
570 , _branch_y :: Double
571 , _branch_w :: Double
572 , _branch_t :: Double
573 , _branch_label :: Text
574 , _branch_meta :: Map Text [Double]
575 } deriving (Generic, Show, Eq)
577 instance ToSchema PhyloBranch where
578 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_branch_")
582 { _export_groups :: [PhyloGroup]
583 , _export_branches :: [PhyloBranch]
584 } deriving (Generic, Show)
585 instance ToSchema PhyloExport where
586 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_export_")
593 makeLenses ''PhyloConfig
594 makeLenses ''PhyloSubConfig
595 makeLenses ''Proximity
596 makeLenses ''SeaElevation
599 makeLenses ''PhyloLabel
600 makeLenses ''TimeUnit
601 makeLenses ''PhyloFoundations
602 makeLenses ''Clustering
604 makeLenses ''PhyloPeriod
605 makeLenses ''PhyloScale
606 makeLenses ''PhyloGroup
607 makeLenses ''PhyloParam
608 makeLenses ''PhyloExport
609 makeLenses ''PhyloBranch
611 ------------------------
612 -- | JSON instances | --
613 ------------------------
615 instance FromJSON Phylo
616 instance ToJSON Phylo
618 instance FromJSON PhyloSources
619 instance ToJSON PhyloSources
621 instance FromJSON PhyloParam
622 instance ToJSON PhyloParam
624 instance FromJSON PhyloPeriod
625 instance ToJSON PhyloPeriod
627 instance FromJSON PhyloScale
628 instance ToJSON PhyloScale
630 instance FromJSON Software
631 instance ToJSON Software
633 instance FromJSON PhyloGroup
634 instance ToJSON PhyloGroup
636 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)