2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.AdaptativePhylo where
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
47 import qualified Data.Text.Lazy as TextLazy
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
67 deriving (Show,Generic,Eq)
70 data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
72 data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
76 { _bpt_threshold :: Double
77 , _bpt_sensibility :: Double
78 , _bpt_scope :: SynchronyScope
79 , _bpt_strategy :: SynchronyStrategy }
80 | ByProximityDistribution
81 { _bpd_sensibility :: Double
82 , _bpd_strategy :: SynchronyStrategy }
83 deriving (Show,Generic,Eq)
90 , _year_matchingFrame :: Int }
91 deriving (Show,Generic,Eq)
100 deriving (Show,Generic,Eq)
104 Quality { _qua_granularity :: Double
105 , _qua_minBranch :: Int }
106 deriving (Show,Generic,Eq)
110 Config { corpusPath :: FilePath
111 , listPath :: FilePath
112 , outputPath :: FilePath
113 , corpusParser :: CorpusParser
116 , phyloProximity :: Proximity
117 , phyloSynchrony :: Synchrony
118 , phyloQuality :: Quality
119 , timeUnit :: TimeUnit
121 , exportLabel :: [PhyloLabel]
123 , exportFilter :: [Filter]
124 } deriving (Show,Generic,Eq)
127 defaultConfig :: Config
129 Config { corpusPath = ""
132 , corpusParser = Csv 1000
133 , phyloName = pack "Default Phylo"
135 , phyloProximity = WeightedLogJaccard 10 0 0.1
136 , phyloSynchrony = ByProximityThreshold 0.1 10 AllBranches MergeAllGroups
137 , phyloQuality = Quality 0.1 1
138 , timeUnit = Year 3 1 5
140 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
141 , exportSort = ByHierarchy
142 , exportFilter = [ByBranchSize 2]
145 instance FromJSON Config
146 instance ToJSON Config
147 instance FromJSON CorpusParser
148 instance ToJSON CorpusParser
149 instance FromJSON Proximity
150 instance ToJSON Proximity
151 instance FromJSON TimeUnit
152 instance ToJSON TimeUnit
153 instance FromJSON Clique
154 instance ToJSON Clique
155 instance FromJSON PhyloLabel
156 instance ToJSON PhyloLabel
157 instance FromJSON Tagger
158 instance ToJSON Tagger
159 instance FromJSON Sort
161 instance FromJSON Order
162 instance ToJSON Order
163 instance FromJSON Filter
164 instance ToJSON Filter
165 instance FromJSON SynchronyScope
166 instance ToJSON SynchronyScope
167 instance FromJSON SynchronyStrategy
168 instance ToJSON SynchronyStrategy
169 instance FromJSON Synchrony
170 instance ToJSON Synchrony
171 instance FromJSON Quality
172 instance ToJSON Quality
175 -- | Software parameters
177 Software { _software_name :: Text
178 , _software_version :: Text
179 } deriving (Generic, Show, Eq)
181 defaultSoftware :: Software
183 Software { _software_name = pack "Gargantext"
184 , _software_version = pack "v4" }
187 -- | Global parameters of a Phylo
189 PhyloParam { _phyloParam_version :: Text
190 , _phyloParam_software :: Software
191 , _phyloParam_config :: Config
192 } deriving (Generic, Show, Eq)
194 defaultPhyloParam :: PhyloParam
196 PhyloParam { _phyloParam_version = pack "v2.adaptative"
197 , _phyloParam_software = defaultSoftware
198 , _phyloParam_config = defaultConfig }
206 -- | Date : a simple Integer
209 -- | Ngrams : a contiguous sequence of n terms
212 -- | Document : a piece of Text linked to a Date
213 data Document = Document
216 } deriving (Eq,Show,Generic,NFData)
224 -- | The Foundations of a Phylo created from a given TermList
225 data PhyloFoundations = PhyloFoundations
226 { _foundations_roots :: !(Vector Ngrams)
227 , _foundations_mapList :: TermList
228 } deriving (Generic, Show, Eq)
231 ---------------------------
232 -- | Coocurency Matrix | --
233 ---------------------------
236 -- | Cooc : a coocurency matrix between two ngrams
237 type Cooc = Map (Int,Int) Double
245 -- | Phylo datatype of a phylomemy
246 -- foundations : the foundations of the phylo
247 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
248 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
249 -- param : the parameters of the phylomemy (with the user's configuration)
250 -- periods : the temporal steps of a phylomemy
252 Phylo { _phylo_foundations :: PhyloFoundations
253 , _phylo_timeCooc :: !(Map Date Cooc)
254 , _phylo_timeDocs :: !(Map Date Double)
255 , _phylo_termFreq :: !(Map Int Double)
256 , _phylo_param :: PhyloParam
257 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
259 deriving (Generic, Show, Eq)
262 -- | PhyloPeriodId : the id of a given period
263 type PhyloPeriodId = (Date,Date)
265 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
266 -- id: tuple (start date, end date) of the temporal step of the phylomemy
267 -- levels: levels of granularity
269 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
270 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
271 } deriving (Generic, Show, Eq)
274 -- | Level : a level of clustering
277 -- | PhyloLevelId : the id of a level of clustering in a given period
278 type PhyloLevelId = (PhyloPeriodId,Level)
280 -- | PhyloLevel : levels of phylomemy on a synchronic axis
281 -- Levels description:
282 -- Level 0: The foundations and the base of the phylo
283 -- Level 1: First level of clustering (the Fis)
284 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
286 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
287 , _phylo_levelLevel :: Level
288 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
290 deriving (Generic, Show, Eq)
293 type PhyloGroupId = (PhyloLevelId, Int)
295 -- | BranchId : (a level, a sequence of branch index)
296 -- the sequence is a path of heritage from the most to the less specific branch
297 type PhyloBranchId = (Level, [Int])
299 -- | PhyloGroup : group of ngrams at each level and period
301 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
302 , _phylo_groupLevel :: Level
303 , _phylo_groupIndex :: Int
304 , _phylo_groupLabel :: Text
305 , _phylo_groupSupport :: Support
306 , _phylo_groupNgrams :: [Int]
307 , _phylo_groupCooc :: !(Cooc)
308 , _phylo_groupBranchId :: PhyloBranchId
309 , _phylo_groupMeta :: Map Text [Double]
310 , _phylo_groupLevelParents :: [Pointer]
311 , _phylo_groupLevelChilds :: [Pointer]
312 , _phylo_groupPeriodParents :: [Pointer]
313 , _phylo_groupPeriodChilds :: [Pointer]
315 deriving (Generic, Show, Eq, NFData)
317 -- | Weight : A generic mesure that can be associated with an Id
320 -- | Pointer : A weighted pointer to a given PhyloGroup
321 type Pointer = (PhyloGroupId, Weight)
323 data Filiation = ToParents | ToChilds deriving (Generic, Show)
324 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
327 ----------------------
328 -- | Phylo Clique | --
329 ----------------------
331 -- | Support : Number of Documents where a Clique occurs
334 data PhyloClique = PhyloClique
335 { _phyloClique_nodes :: Set Ngrams
336 , _phyloClique_support :: Support
337 , _phyloClique_period :: (Date,Date)
338 } deriving (Generic,NFData,Show,Eq)
345 type DotId = TextLazy.Text
347 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
349 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
351 data Order = Asc | Desc deriving (Show,Generic,Eq)
353 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
355 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
359 { _branch_labelTagger :: Tagger
360 , _branch_labelSize :: Int }
362 { _group_labelTagger :: Tagger
363 , _group_labelSize :: Int }
364 deriving (Show,Generic,Eq)
368 { _branch_id :: PhyloBranchId
369 , _branch_label :: Text
370 , _branch_meta :: Map Text [Double]
371 } deriving (Generic, Show)
375 { _export_groups :: [PhyloGroup]
376 , _export_branches :: [PhyloBranch]
377 } deriving (Generic, Show)
384 makeLenses ''Proximity
387 makeLenses ''PhyloLabel
388 makeLenses ''TimeUnit
389 makeLenses ''PhyloFoundations
390 makeLenses ''PhyloClique
392 makeLenses ''PhyloPeriod
393 makeLenses ''PhyloLevel
394 makeLenses ''PhyloGroup
395 makeLenses ''PhyloParam
396 makeLenses ''PhyloExport
397 makeLenses ''PhyloBranch
399 ------------------------
400 -- | JSON instances | --
401 ------------------------
404 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)