2 Module : Gargantext.Core.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveAnyClass #-}
25 {-# LANGUAGE TemplateHaskell #-}
27 module Gargantext.Core.Viz.Phylo where
29 import Control.DeepSeq (NFData)
30 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON)
35 import Data.Text (Text, pack)
36 import Data.Vector (Vector)
38 import GHC.IO (FilePath)
39 import Gargantext.Core.Utils.Prefix (unPrefix)
40 import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
41 import Gargantext.Prelude
42 import qualified Data.Text.Lazy as TextLazy
49 Wos {_wos_limit :: Int}
50 | Csv {_csv_limit :: Int}
51 | Csv' {_csv'_limit :: Int}
52 deriving (Show,Generic,Eq)
54 instance ToSchema CorpusParser where
55 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
58 data ListParser = V3 | V4 deriving (Show,Generic,Eq)
59 instance ToSchema ListParser
64 { _cons_start :: Double
65 , _cons_gap :: Double }
67 { _adap_steps :: Double }
69 { _evol_neighborhood :: Bool }
70 deriving (Show,Generic,Eq)
72 instance ToSchema SeaElevation
74 data PhyloSimilarity =
76 { _wlj_sensibility :: Double
77 , _wlj_minSharedNgrams :: Int }
79 { _wls_sensibility :: Double
80 , _wls_minSharedNgrams :: Int }
82 { _hmg_sensibility :: Double
83 , _hmg_minSharedNgrams :: Int}
85 deriving (Show,Generic,Eq)
87 instance ToSchema PhyloSimilarity where
88 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
91 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches
92 deriving (Show,Generic,Eq, ToSchema)
94 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups
95 deriving (Show,Generic,Eq)
97 instance ToSchema SynchronyStrategy where
98 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
103 { _bpt_threshold :: Double
104 , _bpt_sensibility :: Double
105 , _bpt_scope :: SynchronyScope
106 , _bpt_strategy :: SynchronyStrategy }
107 | ByProximityDistribution
108 { _bpd_sensibility :: Double
109 , _bpd_strategy :: SynchronyStrategy }
110 deriving (Show,Generic,Eq)
112 instance ToSchema Synchrony where
113 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
119 { _epoch_period :: Int
121 , _epoch_matchingFrame :: Int }
123 { _year_period :: Int
125 , _year_matchingFrame :: Int }
127 { _month_period :: Int
129 , _month_matchingFrame :: Int }
131 { _week_period :: Int
133 , _week_matchingFrame :: Int }
137 , _day_matchingFrame :: Int }
138 deriving (Show,Generic,Eq,NFData)
140 instance ToSchema TimeUnit where
141 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
144 data MaxCliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
146 instance ToSchema MaxCliqueFilter where
147 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
153 { _fis_support :: Int
157 , _mcl_threshold :: Double
158 , _mcl_filter :: MaxCliqueFilter }
159 deriving (Show,Generic,Eq)
161 instance ToSchema Cluster where
162 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
166 Quality { _qua_granularity :: Double
167 , _qua_minBranch :: Int }
168 deriving (Show,Generic,Eq)
170 instance ToSchema Quality where
171 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_qua_")
175 PhyloConfig { corpusPath :: FilePath
176 , listPath :: FilePath
177 , outputPath :: FilePath
178 , corpusParser :: CorpusParser
179 , listParser :: ListParser
182 , similarity :: PhyloSimilarity
183 , seaElevation :: SeaElevation
184 , defaultMode :: Bool
185 , findAncestors :: Bool
186 , phyloSynchrony :: Synchrony
187 , phyloQuality :: Quality
188 , timeUnit :: TimeUnit
190 , exportLabel :: [PhyloLabel]
192 , exportFilter :: [Filter]
193 } deriving (Show,Generic,Eq)
196 --------------------------------
197 -- | SubConfig API & 1Click | --
198 --------------------------------
200 data PhyloSubConfigAPI =
201 PhyloSubConfigAPI { _sc_phyloProximity :: Double
202 , _sc_phyloSynchrony :: Double
203 , _sc_phyloQuality :: Double
204 , _sc_timeUnit :: TimeUnit
205 , _sc_clique :: Cluster
206 , _sc_exportFilter :: Double
207 , _sc_defaultMode :: Bool
208 } deriving (Show,Generic,Eq)
211 subConfigAPI2config :: PhyloSubConfigAPI -> PhyloConfig
212 subConfigAPI2config subConfig = defaultConfig
213 { similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 2
214 , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
215 , phyloQuality = Quality (_sc_phyloQuality subConfig) 3
216 , timeUnit = _sc_timeUnit subConfig
217 , clique = _sc_clique subConfig
218 , defaultMode = _sc_defaultMode subConfig
219 , exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
222 --------------------------
223 -- | SubConfig 1Click | --
224 --------------------------
226 defaultConfig :: PhyloConfig
228 PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
229 , listPath = "list.csv" -- useful for commandline only
230 , outputPath = "data/"
231 , corpusParser = Csv 150000
233 , phyloName = pack "Phylo Name"
235 , similarity = WeightedLogJaccard 0.5 2
236 , seaElevation = Constante 0.1 0.1
237 , defaultMode = False
238 , findAncestors = True
239 , phyloSynchrony = ByProximityThreshold 0.6 0 AllBranches MergeAllGroups
240 , phyloQuality = Quality 0.5 3
241 , timeUnit = Year 3 1 5
243 , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
244 , exportSort = ByHierarchy Desc
245 , exportFilter = [ByBranchSize 3]
249 instance ToSchema PhyloConfig
250 instance ToSchema PhyloSubConfigAPI
252 instance FromJSON PhyloConfig
253 instance ToJSON PhyloConfig
255 instance FromJSON PhyloSubConfigAPI
256 instance ToJSON PhyloSubConfigAPI
258 instance FromJSON CorpusParser
259 instance ToJSON CorpusParser
261 instance FromJSON ListParser
262 instance ToJSON ListParser
264 instance FromJSON PhyloSimilarity
265 instance ToJSON PhyloSimilarity
267 instance FromJSON SeaElevation
268 instance ToJSON SeaElevation
270 instance FromJSON TimeUnit
271 instance ToJSON TimeUnit
273 instance FromJSON MaxCliqueFilter
274 instance ToJSON MaxCliqueFilter
276 instance FromJSON Cluster
277 instance ToJSON Cluster
279 instance FromJSON PhyloLabel
280 instance ToJSON PhyloLabel
282 instance FromJSON Tagger
283 instance ToJSON Tagger
285 instance FromJSON Sort
288 instance FromJSON Order
289 instance ToJSON Order
291 instance FromJSON Filter
292 instance ToJSON Filter
294 instance FromJSON SynchronyScope
295 instance ToJSON SynchronyScope
297 instance FromJSON SynchronyStrategy
298 instance ToJSON SynchronyStrategy
300 instance FromJSON Synchrony
301 instance ToJSON Synchrony
303 instance FromJSON Quality
304 instance ToJSON Quality
307 -- | Software parameters
309 Software { _software_name :: Text
310 , _software_version :: Text
311 } deriving (Generic, Show, Eq)
313 instance ToSchema Software where
314 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
318 defaultSoftware :: Software
320 Software { _software_name = pack "GarganText"
321 , _software_version = pack "v5" }
324 -- | Global parameters of a Phylo
326 PhyloParam { _phyloParam_version :: Text
327 , _phyloParam_software :: Software
328 , _phyloParam_config :: PhyloConfig
329 } deriving (Generic, Show, Eq)
331 instance ToSchema PhyloParam where
332 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
336 defaultPhyloParam :: PhyloParam
338 PhyloParam { _phyloParam_version = pack "v3"
339 , _phyloParam_software = defaultSoftware
340 , _phyloParam_config = defaultConfig }
347 -- | Date : a simple Integer
350 -- | DateStr : the string version of a Date
353 -- | Ngrams : a contiguous sequence of n terms
356 -- Document : a piece of Text linked to a Date
357 -- date = computational date; date' = original string date yyyy-mm-dd
358 -- Export Database to Document
359 data Document = Document
360 { date :: Date -- datatype Date {unDate :: Int}
361 , date' :: DateStr -- show date
363 , weight :: Maybe Double
365 , docTime :: TimeUnit
366 } deriving (Eq,Show,Generic,NFData)
374 -- | The Foundations of a Phylo created from a given TermList
375 data PhyloFoundations = PhyloFoundations
376 { _foundations_roots :: (Vector Ngrams)
377 , _foundations_rootsInGroups :: Map Int [PhyloGroupId] -- map of roots associated to groups
378 } deriving (Generic, Show, Eq)
380 data PhyloCounts = PhyloCounts
381 { coocByDate :: !(Map Date Cooc)
382 , docsByDate :: !(Map Date Double)
383 , rootsCountByDate :: !(Map Date (Map Int Double))
384 , rootsCount :: !(Map Int Double)
385 , rootsFreq :: !(Map Int Double)
386 , lastRootsFreq :: !(Map Int Double)
387 } deriving (Generic, Show, Eq)
389 data PhyloSources = PhyloSources
390 { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
392 instance ToSchema PhyloFoundations where
393 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_")
394 instance ToSchema PhyloCounts where
395 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
396 instance ToSchema PhyloSources where
397 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
399 ---------------------------
400 -- | Coocurency Matrix | --
401 ---------------------------
404 -- | Cooc : a coocurency matrix between two ngrams
405 type Cooc = Map (Int,Int) Double
412 -- | Period : a tuple of Dates
413 type Period = (Date,Date)
415 -- | PeriodStr : a tuple of DateStr
416 type PeriodStr = (DateStr,DateStr)
421 -- | Phylo datatype of a phylomemy
422 -- foundations : the foundations of the phylo
423 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
424 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
425 -- param : the parameters of the phylomemy (with the user's configuration)
426 -- periods : the temporal steps of a phylomemy
428 Phylo { _phylo_foundations :: PhyloFoundations
429 , _phylo_sources :: PhyloSources
430 , _phylo_counts :: PhyloCounts
431 , _phylo_seaLadder :: [Double]
432 , _phylo_param :: PhyloParam
433 , _phylo_periods :: Map Period PhyloPeriod
434 , _phylo_quality :: Double
435 , _phylo_level :: Double
437 deriving (Generic, Show, Eq)
439 instance ToSchema Phylo where
440 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
447 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
448 -- id: tuple (start date, end date) of the temporal step of the phylomemy
449 -- scales: scales of synchronic description
451 PhyloPeriod { _phylo_periodPeriod :: Period
452 , _phylo_periodPeriodStr :: PeriodStr
453 , _phylo_periodScales :: Map PhyloScaleId PhyloScale
454 } deriving (Generic, Show, Eq)
456 instance ToSchema PhyloPeriod where
457 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
463 -- | Scale : a scale of synchronic description
466 -- | PhyloScaleId : the id of a scale of synchronic description
467 type PhyloScaleId = (Period,Scale)
469 -- | PhyloScale : sub-structure of the phylomemy in scale of synchronic description
471 PhyloScale { _phylo_scalePeriod :: Period
472 , _phylo_scalePeriodStr :: PeriodStr
473 , _phylo_scaleScale :: Scale
474 , _phylo_scaleGroups :: Map PhyloGroupId PhyloGroup
476 deriving (Generic, Show, Eq)
478 instance ToSchema PhyloScale where
479 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
482 type PhyloGroupId = (PhyloScaleId, Int)
484 -- | BranchId : (a scale, a sequence of branch index)
485 -- the sequence is a path of heritage from the most to the less specific branch
486 type PhyloBranchId = (Scale, [Int])
488 -- | PhyloGroup : group of ngrams at each scale and period
490 PhyloGroup { _phylo_groupPeriod :: Period
491 , _phylo_groupPeriod' :: (Text,Text)
492 , _phylo_groupScale :: Scale
493 , _phylo_groupIndex :: Int
494 , _phylo_groupLabel :: Text
495 , _phylo_groupSupport :: Support
496 , _phylo_groupWeight :: Maybe Double
497 , _phylo_groupSources :: [Int]
498 , _phylo_groupNgrams :: [Int]
499 , _phylo_groupCooc :: !(Cooc)
500 , _phylo_groupDensity :: Double
501 , _phylo_groupBranchId :: PhyloBranchId
502 , _phylo_groupMeta :: Map Text [Double]
503 , _phylo_groupRootsCount :: Map Int Double
504 , _phylo_groupScaleParents :: [Pointer]
505 , _phylo_groupScaleChilds :: [Pointer]
506 , _phylo_groupPeriodParents :: [Pointer]
507 , _phylo_groupPeriodChilds :: [Pointer]
508 , _phylo_groupAncestors :: [Pointer]
509 , _phylo_groupPeriodMemoryParents :: [Pointer']
510 , _phylo_groupPeriodMemoryChilds :: [Pointer']
512 deriving (Generic, Show, Eq, NFData, Ord)
514 instance ToSchema PhyloGroup where
515 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
518 -- | Weight : A generic mesure that can be associated with an Id
522 -- | Pointer : A weighted pointer to a given PhyloGroup
523 type Pointer = (PhyloGroupId, Weight)
524 -- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
525 type Pointer' = (PhyloGroupId, (Thr,Weight))
527 data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show)
528 data PointerType = TemporalPointer | ScalePointer deriving (Generic, Show)
531 --------------------------
532 -- | Phylo Clustering | --
533 --------------------------
535 -- | Support : Number of Documents where a Cluster occurs
538 data Clustering = Clustering
539 { _clustering_roots :: [Int]
540 , _clustering_support :: Support
541 , _clustering_period :: Period
542 -- additional materials for visualization
543 , _clustering_visWeighting :: Maybe Double
544 , _clustering_visFiltering :: [Int]
545 } deriving (Generic,NFData,Show,Eq)
551 type DotId = TextLazy.Text
553 data EdgeType = GroupToGroup | GroupToGroupMemory | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
555 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
556 instance ToSchema Filter where
557 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
560 data Order = Asc | Desc deriving (Show,Generic,Eq, ToSchema)
562 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy {_sort_order :: Order } deriving (Show,Generic,Eq)
563 instance ToSchema Sort where
564 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sort_")
567 data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
568 instance ToSchema Tagger where
569 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
574 { _branch_labelTagger :: Tagger
575 , _branch_labelSize :: Int }
577 { _group_labelTagger :: Tagger
578 , _group_labelSize :: Int }
579 deriving (Show,Generic,Eq)
581 instance ToSchema PhyloLabel where
582 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
587 { _branch_id :: PhyloBranchId
588 , _branch_canonId :: [Int]
589 , _branch_seaLevel :: [Double]
590 , _branch_x :: Double
591 , _branch_y :: Double
592 , _branch_w :: Double
593 , _branch_t :: Double
594 , _branch_label :: Text
595 , _branch_meta :: Map Text [Double]
596 } deriving (Generic, Show, Eq)
598 instance ToSchema PhyloBranch where
599 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_branch_")
603 { _export_groups :: [PhyloGroup]
604 , _export_branches :: [PhyloBranch]
605 } deriving (Generic, Show)
606 instance ToSchema PhyloExport where
607 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_export_")
614 makeLenses ''PhyloConfig
615 makeLenses ''PhyloSubConfigAPI
616 makeLenses ''PhyloSimilarity
617 makeLenses ''SeaElevation
620 makeLenses ''PhyloLabel
621 makeLenses ''TimeUnit
622 makeLenses ''PhyloFoundations
623 makeLenses ''Clustering
625 makeLenses ''PhyloPeriod
626 makeLenses ''PhyloScale
627 makeLenses ''PhyloGroup
628 makeLenses ''PhyloParam
629 makeLenses ''PhyloExport
630 makeLenses ''PhyloBranch
632 ------------------------
633 -- | JSON instances | --
634 ------------------------
636 instance FromJSON Phylo
637 instance ToJSON Phylo
639 instance FromJSON PhyloSources
640 instance ToJSON PhyloSources
642 instance FromJSON PhyloParam
643 instance ToJSON PhyloParam
645 instance FromJSON PhyloCounts
646 instance ToJSON PhyloCounts
648 instance FromJSON PhyloPeriod
649 instance ToJSON PhyloPeriod
651 instance FromJSON PhyloScale
652 instance ToJSON PhyloScale
654 instance FromJSON Software
655 instance ToJSON Software
657 instance FromJSON PhyloGroup
658 instance ToJSON PhyloGroup
660 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)