2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.Phylo where
29 import Control.DeepSeq (NFData)
30 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON)
35 import Data.Text (Text, pack)
36 import Data.Vector (Vector)
38 import GHC.IO (FilePath)
39 import Gargantext.Core.Utils.Prefix (unPrefix)
40 import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
41 import Gargantext.Prelude
42 import qualified Data.Text.Lazy as TextLazy
49 Wos {_wos_limit :: Int}
50 | Csv {_csv_limit :: Int}
51 | Csv' {_csv'_limit :: Int}
52 deriving (Show,Generic,Eq)
54 instance ToSchema CorpusParser where
55 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
58 data ListParser = V3 | V4 deriving (Show,Generic,Eq)
59 instance ToSchema ListParser
64 { _cons_start :: Double
65 , _cons_gap :: Double }
67 { _adap_steps :: Double }
69 { _evol_neighborhood :: Bool }
70 deriving (Show,Generic,Eq)
72 instance ToSchema SeaElevation
74 data PhyloSimilarity =
76 { _wlj_sensibility :: Double
77 , _wlj_minSharedNgrams :: Int }
79 { _wls_sensibility :: Double
80 , _wls_minSharedNgrams :: Int }
82 { _hmg_sensibility :: Double
83 , _hmg_minSharedNgrams :: Int}
85 deriving (Show,Generic,Eq)
87 instance ToSchema PhyloSimilarity where
88 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
91 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches
92 deriving (Show,Generic,Eq, ToSchema)
94 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups
95 deriving (Show,Generic,Eq)
97 instance ToSchema SynchronyStrategy where
98 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
103 { _bpt_threshold :: Double
104 , _bpt_sensibility :: Double
105 , _bpt_scope :: SynchronyScope
106 , _bpt_strategy :: SynchronyStrategy }
107 | ByProximityDistribution
108 { _bpd_sensibility :: Double
109 , _bpd_strategy :: SynchronyStrategy }
110 deriving (Show,Generic,Eq)
112 instance ToSchema Synchrony where
113 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
119 { _epoch_period :: Int
121 , _epoch_matchingFrame :: Int }
123 { _year_period :: Int
125 , _year_matchingFrame :: Int }
127 { _month_period :: Int
129 , _month_matchingFrame :: Int }
131 { _week_period :: Int
133 , _week_matchingFrame :: Int }
137 , _day_matchingFrame :: Int }
138 deriving (Show,Generic,Eq,NFData)
140 instance ToSchema TimeUnit where
141 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
144 data MaxCliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
146 instance ToSchema MaxCliqueFilter where
147 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
153 { _fis_support :: Int
157 , _mcl_threshold :: Double
158 , _mcl_filter :: MaxCliqueFilter }
159 deriving (Show,Generic,Eq)
161 instance ToSchema Cluster where
162 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
166 Quality { _qua_granularity :: Double
167 , _qua_minBranch :: Int }
168 deriving (Show,Generic,Eq)
170 instance ToSchema Quality where
171 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_qua_")
175 PhyloConfig { corpusPath :: FilePath
176 , listPath :: FilePath
177 , outputPath :: FilePath
178 , corpusParser :: CorpusParser
179 , listParser :: ListParser
182 , similarity :: PhyloSimilarity
183 , seaElevation :: SeaElevation
184 , defaultMode :: Bool
185 , findAncestors :: Bool
186 , phyloSynchrony :: Synchrony
187 , phyloQuality :: Quality
188 , timeUnit :: TimeUnit
190 , exportLabel :: [PhyloLabel]
192 , exportFilter :: [Filter]
193 } deriving (Show,Generic,Eq)
196 --------------------------------
197 -- | SubConfig API & 1Click | --
198 --------------------------------
200 data PhyloSubConfigAPI =
201 PhyloSubConfigAPI { _sc_phyloProximity :: Double
202 , _sc_phyloSynchrony :: Double
203 , _sc_phyloQuality :: Double
204 , _sc_timeUnit :: TimeUnit
205 , _sc_clique :: Cluster
206 , _sc_exportFilter :: Double
207 } deriving (Show,Generic,Eq)
210 subConfigAPI2config :: PhyloSubConfigAPI -> PhyloConfig
211 subConfigAPI2config subConfig = defaultConfig
212 { similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 2
213 , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
214 , phyloQuality = Quality (_sc_phyloQuality subConfig) 3
215 , timeUnit = _sc_timeUnit subConfig
216 , clique = _sc_clique subConfig
217 , exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
220 --------------------------
221 -- | SubConfig 1Click | --
222 --------------------------
224 defaultConfig :: PhyloConfig
226 PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
227 , listPath = "list.csv" -- useful for commandline only
228 , outputPath = "data/"
229 , corpusParser = Csv 150000
231 , phyloName = pack "Phylo Name"
233 , similarity = WeightedLogJaccard 0.5 2
234 , seaElevation = Constante 0.1 0.1
235 , defaultMode = False
236 , findAncestors = True
237 , phyloSynchrony = ByProximityThreshold 0.6 0 AllBranches MergeAllGroups
238 , phyloQuality = Quality 0.5 3
239 , timeUnit = Year 3 1 5
241 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
242 , exportSort = ByHierarchy Desc
243 , exportFilter = [ByBranchSize 3]
247 instance ToSchema PhyloConfig
248 instance ToSchema PhyloSubConfigAPI
250 instance FromJSON PhyloConfig
251 instance ToJSON PhyloConfig
253 instance FromJSON PhyloSubConfigAPI
254 instance ToJSON PhyloSubConfigAPI
256 instance FromJSON CorpusParser
257 instance ToJSON CorpusParser
259 instance FromJSON ListParser
260 instance ToJSON ListParser
262 instance FromJSON PhyloSimilarity
263 instance ToJSON PhyloSimilarity
265 instance FromJSON SeaElevation
266 instance ToJSON SeaElevation
268 instance FromJSON TimeUnit
269 instance ToJSON TimeUnit
271 instance FromJSON MaxCliqueFilter
272 instance ToJSON MaxCliqueFilter
274 instance FromJSON Cluster
275 instance ToJSON Cluster
277 instance FromJSON PhyloLabel
278 instance ToJSON PhyloLabel
280 instance FromJSON Tagger
281 instance ToJSON Tagger
283 instance FromJSON Sort
286 instance FromJSON Order
287 instance ToJSON Order
289 instance FromJSON Filter
290 instance ToJSON Filter
292 instance FromJSON SynchronyScope
293 instance ToJSON SynchronyScope
295 instance FromJSON SynchronyStrategy
296 instance ToJSON SynchronyStrategy
298 instance FromJSON Synchrony
299 instance ToJSON Synchrony
301 instance FromJSON Quality
302 instance ToJSON Quality
305 -- | Software parameters
307 Software { _software_name :: Text
308 , _software_version :: Text
309 } deriving (Generic, Show, Eq)
311 instance ToSchema Software where
312 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
316 defaultSoftware :: Software
318 Software { _software_name = pack "GarganText"
319 , _software_version = pack "v5" }
322 -- | Global parameters of a Phylo
324 PhyloParam { _phyloParam_version :: Text
325 , _phyloParam_software :: Software
326 , _phyloParam_config :: PhyloConfig
327 } deriving (Generic, Show, Eq)
329 instance ToSchema PhyloParam where
330 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
334 defaultPhyloParam :: PhyloParam
336 PhyloParam { _phyloParam_version = pack "v3"
337 , _phyloParam_software = defaultSoftware
338 , _phyloParam_config = defaultConfig }
345 -- | Date : a simple Integer
348 -- | DateStr : the string version of a Date
351 -- | Ngrams : a contiguous sequence of n terms
354 -- Document : a piece of Text linked to a Date
355 -- date = computational date; date' = original string date yyyy-mm-dd
356 -- Export Database to Document
357 data Document = Document
358 { date :: Date -- datatype Date {unDate :: Int}
359 , date' :: DateStr -- show date
361 , weight :: Maybe Double
363 , docTime :: TimeUnit
364 } deriving (Eq,Show,Generic,NFData)
372 -- | The Foundations of a Phylo created from a given TermList
373 data PhyloFoundations = PhyloFoundations
374 { _foundations_roots :: (Vector Ngrams)
375 , _foundations_rootsInGroups :: Map Int [PhyloGroupId] -- map of roots associated to groups
376 } deriving (Generic, Show, Eq)
378 data PhyloCounts = PhyloCounts
379 { coocByDate :: !(Map Date Cooc)
380 , docsByDate :: !(Map Date Double)
381 , rootsCountByDate :: !(Map Date (Map Int Double))
382 , rootsCount :: !(Map Int Double)
383 , rootsFreq :: !(Map Int Double)
384 , lastRootsFreq :: !(Map Int Double)
385 } deriving (Generic, Show, Eq)
387 data PhyloSources = PhyloSources
388 { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
390 instance ToSchema PhyloFoundations where
391 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_")
392 instance ToSchema PhyloCounts where
393 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
394 instance ToSchema PhyloSources where
395 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
397 ---------------------------
398 -- | Coocurency Matrix | --
399 ---------------------------
402 -- | Cooc : a coocurency matrix between two ngrams
403 type Cooc = Map (Int,Int) Double
410 -- | Period : a tuple of Dates
411 type Period = (Date,Date)
413 -- | PeriodStr : a tuple of DateStr
414 type PeriodStr = (DateStr,DateStr)
419 -- | Phylo datatype of a phylomemy
420 -- foundations : the foundations of the phylo
421 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
422 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
423 -- param : the parameters of the phylomemy (with the user's configuration)
424 -- periods : the temporal steps of a phylomemy
426 Phylo { _phylo_foundations :: PhyloFoundations
427 , _phylo_sources :: PhyloSources
428 , _phylo_counts :: PhyloCounts
429 , _phylo_seaLadder :: [Double]
430 , _phylo_param :: PhyloParam
431 , _phylo_periods :: Map Period PhyloPeriod
432 , _phylo_quality :: Double
433 , _phylo_level :: Double
435 deriving (Generic, Show, Eq)
437 instance ToSchema Phylo where
438 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
445 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
446 -- id: tuple (start date, end date) of the temporal step of the phylomemy
447 -- scales: scales of synchronic description
449 PhyloPeriod { _phylo_periodPeriod :: Period
450 , _phylo_periodPeriodStr :: PeriodStr
451 , _phylo_periodScales :: Map PhyloScaleId PhyloScale
452 } deriving (Generic, Show, Eq)
454 instance ToSchema PhyloPeriod where
455 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
461 -- | Scale : a scale of synchronic description
464 -- | PhyloScaleId : the id of a scale of synchronic description
465 type PhyloScaleId = (Period,Scale)
467 -- | PhyloScale : sub-structure of the phylomemy in scale of synchronic description
469 PhyloScale { _phylo_scalePeriod :: Period
470 , _phylo_scalePeriodStr :: PeriodStr
471 , _phylo_scaleScale :: Scale
472 , _phylo_scaleGroups :: Map PhyloGroupId PhyloGroup
474 deriving (Generic, Show, Eq)
476 instance ToSchema PhyloScale where
477 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
480 type PhyloGroupId = (PhyloScaleId, Int)
482 -- | BranchId : (a scale, a sequence of branch index)
483 -- the sequence is a path of heritage from the most to the less specific branch
484 type PhyloBranchId = (Scale, [Int])
486 -- | PhyloGroup : group of ngrams at each scale and period
488 PhyloGroup { _phylo_groupPeriod :: Period
489 , _phylo_groupPeriod' :: (Text,Text)
490 , _phylo_groupScale :: Scale
491 , _phylo_groupIndex :: Int
492 , _phylo_groupLabel :: Text
493 , _phylo_groupSupport :: Support
494 , _phylo_groupWeight :: Maybe Double
495 , _phylo_groupSources :: [Int]
496 , _phylo_groupNgrams :: [Int]
497 , _phylo_groupCooc :: !(Cooc)
498 , _phylo_groupDensity :: Double
499 , _phylo_groupBranchId :: PhyloBranchId
500 , _phylo_groupMeta :: Map Text [Double]
501 , _phylo_groupRootsCount :: Map Int Double
502 , _phylo_groupScaleParents :: [Pointer]
503 , _phylo_groupScaleChilds :: [Pointer]
504 , _phylo_groupPeriodParents :: [Pointer]
505 , _phylo_groupPeriodChilds :: [Pointer]
506 , _phylo_groupAncestors :: [Pointer]
507 , _phylo_groupPeriodMemoryParents :: [Pointer']
508 , _phylo_groupPeriodMemoryChilds :: [Pointer']
510 deriving (Generic, Show, Eq, NFData)
512 instance ToSchema PhyloGroup where
513 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
516 -- | Weight : A generic mesure that can be associated with an Id
520 -- | Pointer : A weighted pointer to a given PhyloGroup
521 type Pointer = (PhyloGroupId, Weight)
522 -- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
523 type Pointer' = (PhyloGroupId, (Thr,Weight))
525 data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show)
526 data PointerType = TemporalPointer | ScalePointer deriving (Generic, Show)
529 --------------------------
530 -- | Phylo Clustering | --
531 --------------------------
533 -- | Support : Number of Documents where a Cluster occurs
536 data Clustering = Clustering
537 { _clustering_roots :: [Int]
538 , _clustering_support :: Support
539 , _clustering_period :: Period
540 -- additional materials for visualization
541 , _clustering_visWeighting :: Maybe Double
542 , _clustering_visFiltering :: [Int]
543 } deriving (Generic,NFData,Show,Eq)
549 type DotId = TextLazy.Text
551 data EdgeType = GroupToGroup | GroupToGroupMemory | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
553 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
554 instance ToSchema Filter where
555 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
558 data Order = Asc | Desc deriving (Show,Generic,Eq, ToSchema)
560 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy {_sort_order :: Order } deriving (Show,Generic,Eq)
561 instance ToSchema Sort where
562 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sort_")
565 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
566 instance ToSchema Tagger where
567 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
572 { _branch_labelTagger :: Tagger
573 , _branch_labelSize :: Int }
575 { _group_labelTagger :: Tagger
576 , _group_labelSize :: Int }
577 deriving (Show,Generic,Eq)
579 instance ToSchema PhyloLabel where
580 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
585 { _branch_id :: PhyloBranchId
586 , _branch_canonId :: [Int]
587 , _branch_seaLevel :: [Double]
588 , _branch_x :: Double
589 , _branch_y :: Double
590 , _branch_w :: Double
591 , _branch_t :: Double
592 , _branch_label :: Text
593 , _branch_meta :: Map Text [Double]
594 } deriving (Generic, Show, Eq)
596 instance ToSchema PhyloBranch where
597 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_branch_")
601 { _export_groups :: [PhyloGroup]
602 , _export_branches :: [PhyloBranch]
603 } deriving (Generic, Show)
604 instance ToSchema PhyloExport where
605 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_export_")
612 makeLenses ''PhyloConfig
613 makeLenses ''PhyloSubConfigAPI
614 makeLenses ''PhyloSimilarity
615 makeLenses ''SeaElevation
618 makeLenses ''PhyloLabel
619 makeLenses ''TimeUnit
620 makeLenses ''PhyloFoundations
621 makeLenses ''Clustering
623 makeLenses ''PhyloPeriod
624 makeLenses ''PhyloScale
625 makeLenses ''PhyloGroup
626 makeLenses ''PhyloParam
627 makeLenses ''PhyloExport
628 makeLenses ''PhyloBranch
630 ------------------------
631 -- | JSON instances | --
632 ------------------------
634 instance FromJSON Phylo
635 instance ToJSON Phylo
637 instance FromJSON PhyloSources
638 instance ToJSON PhyloSources
640 instance FromJSON PhyloParam
641 instance ToJSON PhyloParam
643 instance FromJSON PhyloCounts
644 instance ToJSON PhyloCounts
646 instance FromJSON PhyloPeriod
647 instance ToJSON PhyloPeriod
649 instance FromJSON PhyloScale
650 instance ToJSON PhyloScale
652 instance FromJSON Software
653 instance ToJSON Software
655 instance FromJSON PhyloGroup
656 instance ToJSON PhyloGroup
658 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)