2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.Phylo where
29 import Control.DeepSeq (NFData)
30 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON)
35 import Data.Text (Text, pack)
36 import Data.Vector (Vector)
38 import GHC.IO (FilePath)
39 import Gargantext.Core.Utils.Prefix (unPrefix)
40 import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
41 import Gargantext.Prelude
42 import qualified Data.Text.Lazy as TextLazy
49 Wos {_wos_limit :: Int}
50 | Csv {_csv_limit :: Int}
51 | Csv' {_csv'_limit :: Int}
52 deriving (Show,Generic,Eq)
54 instance ToSchema CorpusParser where
55 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
58 data ListParser = V3 | V4 deriving (Show,Generic,Eq)
59 instance ToSchema ListParser
64 { _cons_start :: Double
65 , _cons_gap :: Double }
67 { _adap_steps :: Double }
69 { _evol_neighborhood :: Bool }
70 deriving (Show,Generic,Eq)
72 instance ToSchema SeaElevation
74 data PhyloSimilarity =
76 { _wlj_sensibility :: Double
77 , _wlj_minSharedNgrams :: Int }
79 { _wls_sensibility :: Double
80 , _wls_minSharedNgrams :: Int }
82 { _hmg_sensibility :: Double
83 , _hmg_minSharedNgrams :: Int}
85 deriving (Show,Generic,Eq)
87 instance ToSchema PhyloSimilarity where
88 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
91 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches
92 deriving (Show,Generic,Eq, ToSchema)
94 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups
95 deriving (Show,Generic,Eq)
97 instance ToSchema SynchronyStrategy where
98 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
103 { _bpt_threshold :: Double
104 , _bpt_sensibility :: Double
105 , _bpt_scope :: SynchronyScope
106 , _bpt_strategy :: SynchronyStrategy }
107 | ByProximityDistribution
108 { _bpd_sensibility :: Double
109 , _bpd_strategy :: SynchronyStrategy }
110 deriving (Show,Generic,Eq)
112 instance ToSchema Synchrony where
113 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
119 { _epoch_period :: Int
121 , _epoch_matchingFrame :: Int }
123 { _year_period :: Int
125 , _year_matchingFrame :: Int }
127 { _month_period :: Int
129 , _month_matchingFrame :: Int }
131 { _week_period :: Int
133 , _week_matchingFrame :: Int }
137 , _day_matchingFrame :: Int }
138 deriving (Show,Generic,Eq)
140 instance ToSchema TimeUnit where
141 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
144 data MaxCliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
146 instance ToSchema MaxCliqueFilter where
147 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
153 { _fis_support :: Int
157 , _mcl_threshold :: Double
158 , _mcl_filter :: MaxCliqueFilter }
159 deriving (Show,Generic,Eq)
161 instance ToSchema Cluster where
162 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
166 Quality { _qua_granularity :: Double
167 , _qua_minBranch :: Int }
168 deriving (Show,Generic,Eq)
170 instance ToSchema Quality where
171 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_qua_")
175 PhyloConfig { corpusPath :: FilePath
176 , listPath :: FilePath
177 , outputPath :: FilePath
178 , corpusParser :: CorpusParser
179 , listParser :: ListParser
182 , similarity :: PhyloSimilarity
183 , seaElevation :: SeaElevation
184 , defaultMode :: Bool
185 , findAncestors :: Bool
186 , phyloSynchrony :: Synchrony
187 , phyloQuality :: Quality
188 , timeUnit :: TimeUnit
190 , exportLabel :: [PhyloLabel]
192 , exportFilter :: [Filter]
193 } deriving (Show,Generic,Eq)
196 ------------------------------------------------------------------------
197 data PhyloSubConfig =
198 PhyloSubConfig { _sc_phyloProximity :: Double
199 , _sc_phyloSynchrony :: Double
200 , _sc_phyloQuality :: Double
201 , _sc_timeUnit :: TimeUnit
202 , _sc_clique :: Cluster
203 , _sc_exportFilter :: Double
205 deriving (Show,Generic,Eq)
208 subConfig2config :: PhyloSubConfig -> PhyloConfig
209 subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
210 , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
211 , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
212 , timeUnit = _sc_timeUnit subConfig
213 , clique = _sc_clique subConfig
214 , exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
218 ------------------------------------------------------------------------
219 defaultConfig :: PhyloConfig
221 PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
222 , listPath = "list.csv" -- useful for commandline only
223 , outputPath = "data/"
224 , corpusParser = Csv 100000
226 , phyloName = pack "Phylo Name"
228 , similarity = WeightedLogJaccard 0.5 1
229 , seaElevation = Constante 0.1 0.1
231 , findAncestors = False
232 , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
233 , phyloQuality = Quality 0.5 1
234 , timeUnit = Year 3 1 5
235 , clique = MaxClique 5 0.0001 ByThreshold
236 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
237 , exportSort = ByHierarchy Desc
238 , exportFilter = [ByBranchSize 3]
242 instance ToSchema PhyloConfig
243 instance ToSchema PhyloSubConfig
245 instance FromJSON PhyloConfig
246 instance ToJSON PhyloConfig
248 instance FromJSON PhyloSubConfig
249 instance ToJSON PhyloSubConfig
251 instance FromJSON CorpusParser
252 instance ToJSON CorpusParser
254 instance FromJSON ListParser
255 instance ToJSON ListParser
257 instance FromJSON PhyloSimilarity
258 instance ToJSON PhyloSimilarity
260 instance FromJSON SeaElevation
261 instance ToJSON SeaElevation
263 instance FromJSON TimeUnit
264 instance ToJSON TimeUnit
266 instance FromJSON MaxCliqueFilter
267 instance ToJSON MaxCliqueFilter
269 instance FromJSON Cluster
270 instance ToJSON Cluster
272 instance FromJSON PhyloLabel
273 instance ToJSON PhyloLabel
275 instance FromJSON Tagger
276 instance ToJSON Tagger
278 instance FromJSON Sort
281 instance FromJSON Order
282 instance ToJSON Order
284 instance FromJSON Filter
285 instance ToJSON Filter
287 instance FromJSON SynchronyScope
288 instance ToJSON SynchronyScope
290 instance FromJSON SynchronyStrategy
291 instance ToJSON SynchronyStrategy
293 instance FromJSON Synchrony
294 instance ToJSON Synchrony
296 instance FromJSON Quality
297 instance ToJSON Quality
300 -- | Software parameters
302 Software { _software_name :: Text
303 , _software_version :: Text
304 } deriving (Generic, Show, Eq)
306 instance ToSchema Software where
307 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
311 defaultSoftware :: Software
313 Software { _software_name = pack "GarganText"
314 , _software_version = pack "v5" }
317 -- | Global parameters of a Phylo
319 PhyloParam { _phyloParam_version :: Text
320 , _phyloParam_software :: Software
321 , _phyloParam_config :: PhyloConfig
322 } deriving (Generic, Show, Eq)
324 instance ToSchema PhyloParam where
325 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
329 defaultPhyloParam :: PhyloParam
331 PhyloParam { _phyloParam_version = pack "v3"
332 , _phyloParam_software = defaultSoftware
333 , _phyloParam_config = defaultConfig }
340 -- | Date : a simple Integer
343 -- | DateStr : the string version of a Date
346 -- | Ngrams : a contiguous sequence of n terms
349 -- Document : a piece of Text linked to a Date
350 -- date = computational date; date' = original string date yyyy-mm-dd
351 -- Export Database to Document
352 data Document = Document
353 { date :: Date -- datatype Date {unDate :: Int}
354 , date' :: DateStr -- show date
356 , weight :: Maybe Double
358 } deriving (Eq,Show,Generic,NFData)
366 -- | The Foundations of a Phylo created from a given TermList
367 data PhyloFoundations = PhyloFoundations
368 { _foundations_roots :: (Vector Ngrams)
369 , _foundations_rootsInGroups :: Map Int [PhyloGroupId] -- map of roots associated to groups
370 } deriving (Generic, Show, Eq)
372 data PhyloCounts = PhyloCounts
373 { coocByDate :: !(Map Date Cooc)
374 , docsByDate :: !(Map Date Double)
375 , rootsCount :: !(Map Int Double)
376 , rootsFreq :: !(Map Int Double)
377 , lastRootsFreq :: !(Map Int Double)
378 } deriving (Generic, Show, Eq)
380 data PhyloSources = PhyloSources
381 { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
383 instance ToSchema PhyloFoundations where
384 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_")
385 instance ToSchema PhyloCounts where
386 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
387 instance ToSchema PhyloSources where
388 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
390 ---------------------------
391 -- | Coocurency Matrix | --
392 ---------------------------
395 -- | Cooc : a coocurency matrix between two ngrams
396 type Cooc = Map (Int,Int) Double
403 -- | Period : a tuple of Dates
404 type Period = (Date,Date)
406 -- | PeriodStr : a tuple of DateStr
407 type PeriodStr = (DateStr,DateStr)
412 -- | Phylo datatype of a phylomemy
413 -- foundations : the foundations of the phylo
414 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
415 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
416 -- param : the parameters of the phylomemy (with the user's configuration)
417 -- periods : the temporal steps of a phylomemy
419 Phylo { _phylo_foundations :: PhyloFoundations
420 , _phylo_sources :: PhyloSources
421 , _phylo_counts :: PhyloCounts
422 , _phylo_seaLadder :: [Double]
423 , _phylo_param :: PhyloParam
424 , _phylo_periods :: Map Period PhyloPeriod
425 , _phylo_quality :: Double
426 , _phylo_level :: Double
428 deriving (Generic, Show, Eq)
430 instance ToSchema Phylo where
431 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
438 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
439 -- id: tuple (start date, end date) of the temporal step of the phylomemy
440 -- scales: scales of synchronic description
442 PhyloPeriod { _phylo_periodPeriod :: Period
443 , _phylo_periodPeriodStr :: PeriodStr
444 , _phylo_periodScales :: Map PhyloScaleId PhyloScale
445 } deriving (Generic, Show, Eq)
447 instance ToSchema PhyloPeriod where
448 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
454 -- | Scale : a scale of synchronic description
457 -- | PhyloScaleId : the id of a scale of synchronic description
458 type PhyloScaleId = (Period,Scale)
460 -- | PhyloScale : sub-structure of the phylomemy in scale of synchronic description
462 PhyloScale { _phylo_scalePeriod :: Period
463 , _phylo_scalePeriodStr :: PeriodStr
464 , _phylo_scaleScale :: Scale
465 , _phylo_scaleGroups :: Map PhyloGroupId PhyloGroup
467 deriving (Generic, Show, Eq)
469 instance ToSchema PhyloScale where
470 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
473 type PhyloGroupId = (PhyloScaleId, Int)
475 -- | BranchId : (a scale, a sequence of branch index)
476 -- the sequence is a path of heritage from the most to the less specific branch
477 type PhyloBranchId = (Scale, [Int])
479 -- | PhyloGroup : group of ngrams at each scale and period
481 PhyloGroup { _phylo_groupPeriod :: Period
482 , _phylo_groupPeriod' :: (Text,Text)
483 , _phylo_groupScale :: Scale
484 , _phylo_groupIndex :: Int
485 , _phylo_groupLabel :: Text
486 , _phylo_groupSupport :: Support
487 , _phylo_groupWeight :: Maybe Double
488 , _phylo_groupSources :: [Int]
489 , _phylo_groupNgrams :: [Int]
490 , _phylo_groupCooc :: !(Cooc)
491 , _phylo_groupBranchId :: PhyloBranchId
492 , _phylo_groupMeta :: Map Text [Double]
493 , _phylo_groupScaleParents :: [Pointer]
494 , _phylo_groupScaleChilds :: [Pointer]
495 , _phylo_groupPeriodParents :: [Pointer]
496 , _phylo_groupPeriodChilds :: [Pointer]
497 , _phylo_groupAncestors :: [Pointer]
498 , _phylo_groupPeriodMemoryParents :: [Pointer']
499 , _phylo_groupPeriodMemoryChilds :: [Pointer']
501 deriving (Generic, Show, Eq, NFData)
503 instance ToSchema PhyloGroup where
504 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
507 -- | Weight : A generic mesure that can be associated with an Id
511 -- | Pointer : A weighted pointer to a given PhyloGroup
512 type Pointer = (PhyloGroupId, Weight)
513 -- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
514 type Pointer' = (PhyloGroupId, (Thr,Weight))
516 data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show)
517 data PointerType = TemporalPointer | ScalePointer deriving (Generic, Show)
520 --------------------------
521 -- | Phylo Clustering | --
522 --------------------------
524 -- | Support : Number of Documents where a Cluster occurs
527 data Clustering = Clustering
528 { _clustering_roots :: [Int]
529 , _clustering_support :: Support
530 , _clustering_period :: Period
531 -- additional materials for visualization
532 , _clustering_visWeighting :: Maybe Double
533 , _clustering_visFiltering :: [Int]
534 } deriving (Generic,NFData,Show,Eq)
540 type DotId = TextLazy.Text
542 data EdgeType = GroupToGroup | GroupToGroupMemory | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
544 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
545 instance ToSchema Filter where
546 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
549 data Order = Asc | Desc deriving (Show,Generic,Eq, ToSchema)
551 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy {_sort_order :: Order } deriving (Show,Generic,Eq)
552 instance ToSchema Sort where
553 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sort_")
556 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
557 instance ToSchema Tagger where
558 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
563 { _branch_labelTagger :: Tagger
564 , _branch_labelSize :: Int }
566 { _group_labelTagger :: Tagger
567 , _group_labelSize :: Int }
568 deriving (Show,Generic,Eq)
570 instance ToSchema PhyloLabel where
571 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
576 { _branch_id :: PhyloBranchId
577 , _branch_canonId :: [Int]
578 , _branch_seaLevel :: [Double]
579 , _branch_x :: Double
580 , _branch_y :: Double
581 , _branch_w :: Double
582 , _branch_t :: Double
583 , _branch_label :: Text
584 , _branch_meta :: Map Text [Double]
585 } deriving (Generic, Show, Eq)
587 instance ToSchema PhyloBranch where
588 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_branch_")
592 { _export_groups :: [PhyloGroup]
593 , _export_branches :: [PhyloBranch]
594 } deriving (Generic, Show)
595 instance ToSchema PhyloExport where
596 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_export_")
603 makeLenses ''PhyloConfig
604 makeLenses ''PhyloSubConfig
605 makeLenses ''PhyloSimilarity
606 makeLenses ''SeaElevation
609 makeLenses ''PhyloLabel
610 makeLenses ''TimeUnit
611 makeLenses ''PhyloFoundations
612 makeLenses ''Clustering
614 makeLenses ''PhyloPeriod
615 makeLenses ''PhyloScale
616 makeLenses ''PhyloGroup
617 makeLenses ''PhyloParam
618 makeLenses ''PhyloExport
619 makeLenses ''PhyloBranch
621 ------------------------
622 -- | JSON instances | --
623 ------------------------
625 instance FromJSON Phylo
626 instance ToJSON Phylo
628 instance FromJSON PhyloSources
629 instance ToJSON PhyloSources
631 instance FromJSON PhyloParam
632 instance ToJSON PhyloParam
634 instance FromJSON PhyloCounts
635 instance ToJSON PhyloCounts
637 instance FromJSON PhyloPeriod
638 instance ToJSON PhyloPeriod
640 instance FromJSON PhyloScale
641 instance ToJSON PhyloScale
643 instance FromJSON Software
644 instance ToJSON Software
646 instance FromJSON PhyloGroup
647 instance ToJSON PhyloGroup
649 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)