2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.AdaptativePhylo where
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
47 import qualified Data.Text.Lazy as TextLazy
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
67 deriving (Show,Generic,Eq)
72 { _bpt_threshold :: Double }
73 | ByProximityDistribution
74 deriving (Show,Generic,Eq)
81 , _year_matchingFrame :: Int }
82 deriving (Show,Generic,Eq)
89 deriving (Show,Generic,Eq)
93 Config { corpusPath :: FilePath
94 , listPath :: FilePath
95 , outputPath :: FilePath
96 , corpusParser :: CorpusParser
99 , phyloProximity :: Proximity
100 , phyloSynchrony :: Synchrony
101 , timeUnit :: TimeUnit
102 , contextualUnit :: ContextualUnit
103 , exportLabel :: [PhyloLabel]
105 , exportFilter :: [Filter]
106 } deriving (Show,Generic,Eq)
109 defaultConfig :: Config
111 Config { corpusPath = ""
114 , corpusParser = Csv 1000
115 , phyloName = pack "Default Phylo"
117 , phyloProximity = WeightedLogJaccard 10 0 0.1
118 , phyloSynchrony = ByProximityThreshold 0.1
119 , timeUnit = Year 3 1 5
120 , contextualUnit = Fis 2 4
121 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
122 , exportSort = ByHierarchy
123 , exportFilter = [ByBranchSize 2]
126 instance FromJSON Config
127 instance ToJSON Config
128 instance FromJSON CorpusParser
129 instance ToJSON CorpusParser
130 instance FromJSON Proximity
131 instance ToJSON Proximity
132 instance FromJSON TimeUnit
133 instance ToJSON TimeUnit
134 instance FromJSON ContextualUnit
135 instance ToJSON ContextualUnit
136 instance FromJSON PhyloLabel
137 instance ToJSON PhyloLabel
138 instance FromJSON Tagger
139 instance ToJSON Tagger
140 instance FromJSON Sort
142 instance FromJSON Order
143 instance ToJSON Order
144 instance FromJSON Filter
145 instance ToJSON Filter
146 instance FromJSON Synchrony
147 instance ToJSON Synchrony
150 -- | Software parameters
152 Software { _software_name :: Text
153 , _software_version :: Text
154 } deriving (Generic, Show, Eq)
156 defaultSoftware :: Software
158 Software { _software_name = pack "Gargantext"
159 , _software_version = pack "v4" }
162 -- | Global parameters of a Phylo
164 PhyloParam { _phyloParam_version :: Text
165 , _phyloParam_software :: Software
166 , _phyloParam_config :: Config
167 } deriving (Generic, Show, Eq)
169 defaultPhyloParam :: PhyloParam
171 PhyloParam { _phyloParam_version = pack "v2.adaptative"
172 , _phyloParam_software = defaultSoftware
173 , _phyloParam_config = defaultConfig }
181 -- | Date : a simple Integer
184 -- | Ngrams : a contiguous sequence of n terms
187 -- | Document : a piece of Text linked to a Date
188 data Document = Document
191 } deriving (Eq,Show,Generic,NFData)
199 -- | The Foundations of a Phylo created from a given TermList
200 data PhyloFoundations = PhyloFoundations
201 { _foundations_roots :: !(Vector Ngrams)
202 , _foundations_mapList :: TermList
203 } deriving (Generic, Show, Eq)
206 ---------------------------
207 -- | Coocurency Matrix | --
208 ---------------------------
211 -- | Cooc : a coocurency matrix between two ngrams
212 type Cooc = Map (Int,Int) Double
220 -- | Phylo datatype of a phylomemy
221 -- foundations : the foundations of the phylo
222 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
223 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
224 -- param : the parameters of the phylomemy (with the user's configuration)
225 -- periods : the temporal steps of a phylomemy
227 Phylo { _phylo_foundations :: PhyloFoundations
228 , _phylo_timeCooc :: !(Map Date Cooc)
229 , _phylo_timeDocs :: !(Map Date Double)
230 , _phylo_param :: PhyloParam
231 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
233 deriving (Generic, Show, Eq)
236 -- | PhyloPeriodId : the id of a given period
237 type PhyloPeriodId = (Date,Date)
239 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
240 -- id: tuple (start date, end date) of the temporal step of the phylomemy
241 -- levels: levels of granularity
243 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
244 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
245 } deriving (Generic, Show, Eq)
248 -- | Level : a level of clustering
251 -- | PhyloLevelId : the id of a level of clustering in a given period
252 type PhyloLevelId = (PhyloPeriodId,Level)
254 -- | PhyloLevel : levels of phylomemy on a synchronic axis
255 -- Levels description:
256 -- Level 0: The foundations and the base of the phylo
257 -- Level 1: First level of clustering (the Fis)
258 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
260 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
261 , _phylo_levelLevel :: Level
262 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
264 deriving (Generic, Show, Eq)
267 type PhyloGroupId = (PhyloLevelId, Int)
269 -- | BranchId : (a level, a sequence of branch index)
270 -- the sequence is a path of heritage from the most to the less specific branch
271 type PhyloBranchId = (Level, [Int])
273 -- | PhyloGroup : group of ngrams at each level and period
275 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
276 , _phylo_groupLevel :: Level
277 , _phylo_groupIndex :: Int
278 , _phylo_groupLabel :: Text
279 , _phylo_groupSupport :: Support
280 , _phylo_groupNgrams :: [Int]
281 , _phylo_groupCooc :: !(Cooc)
282 , _phylo_groupBranchId :: PhyloBranchId
283 , _phylo_groupMeta :: Map Text [Double]
284 , _phylo_groupLevelParents :: [Pointer]
285 , _phylo_groupLevelChilds :: [Pointer]
286 , _phylo_groupPeriodParents :: [Pointer]
287 , _phylo_groupPeriodChilds :: [Pointer]
289 deriving (Generic, Show, Eq)
291 -- | Weight : A generic mesure that can be associated with an Id
294 -- | Pointer : A weighted pointer to a given PhyloGroup
295 type Pointer = (PhyloGroupId, Weight)
297 data Filiation = ToParents | ToChilds deriving (Generic, Show)
298 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
301 ---------------------------
302 -- | Frequent Item Set | --
303 ---------------------------
305 -- | Clique : Set of ngrams cooccurring in the same Document
306 type Clique = Set Ngrams
308 -- | Support : Number of Documents where a Clique occurs
311 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
312 data PhyloFis = PhyloFis
313 { _phyloFis_clique :: Clique
314 , _phyloFis_support :: Support
315 , _phyloFis_period :: (Date,Date)
316 } deriving (Generic,NFData,Show,Eq)
323 type DotId = TextLazy.Text
325 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
327 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
329 data Order = Asc | Desc deriving (Show,Generic,Eq)
331 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
333 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
337 { _branch_labelTagger :: Tagger
338 , _branch_labelSize :: Int }
340 { _group_labelTagger :: Tagger
341 , _group_labelSize :: Int }
342 deriving (Show,Generic,Eq)
346 { _branch_id :: PhyloBranchId
347 , _branch_label :: Text
348 , _branch_meta :: Map Text [Double]
349 } deriving (Generic, Show)
353 { _export_groups :: [PhyloGroup]
354 , _export_branches :: [PhyloBranch]
355 } deriving (Generic, Show)
362 makeLenses ''Proximity
363 makeLenses ''ContextualUnit
364 makeLenses ''PhyloLabel
365 makeLenses ''TimeUnit
366 makeLenses ''PhyloFoundations
367 makeLenses ''PhyloFis
369 makeLenses ''PhyloPeriod
370 makeLenses ''PhyloLevel
371 makeLenses ''PhyloGroup
372 makeLenses ''PhyloParam
373 makeLenses ''PhyloExport
374 makeLenses ''PhyloBranch
376 ------------------------
377 -- | JSON instances | --
378 ------------------------
381 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)