2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.AdaptativePhylo where
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
47 import qualified Data.Text.Lazy as TextLazy
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
67 deriving (Show,Generic,Eq)
72 { _bpt_threshold :: Double
73 , _bpt_sensibility :: Double}
74 | ByProximityDistribution
75 { _bpd_sensibility :: Double}
76 deriving (Show,Generic,Eq)
83 , _year_matchingFrame :: Int }
84 deriving (Show,Generic,Eq)
91 deriving (Show,Generic,Eq)
95 Config { corpusPath :: FilePath
96 , listPath :: FilePath
97 , outputPath :: FilePath
98 , corpusParser :: CorpusParser
101 , phyloProximity :: Proximity
102 , phyloSynchrony :: Synchrony
103 , timeUnit :: TimeUnit
104 , contextualUnit :: ContextualUnit
105 , exportLabel :: [PhyloLabel]
107 , exportFilter :: [Filter]
108 } deriving (Show,Generic,Eq)
111 defaultConfig :: Config
113 Config { corpusPath = ""
116 , corpusParser = Csv 1000
117 , phyloName = pack "Default Phylo"
119 , phyloProximity = WeightedLogJaccard 10 0 0.1
120 , phyloSynchrony = ByProximityDistribution 0
121 , timeUnit = Year 3 1 5
122 , contextualUnit = Fis 2 4
123 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
124 , exportSort = ByHierarchy
125 , exportFilter = [ByBranchSize 2]
128 instance FromJSON Config
129 instance ToJSON Config
130 instance FromJSON CorpusParser
131 instance ToJSON CorpusParser
132 instance FromJSON Proximity
133 instance ToJSON Proximity
134 instance FromJSON TimeUnit
135 instance ToJSON TimeUnit
136 instance FromJSON ContextualUnit
137 instance ToJSON ContextualUnit
138 instance FromJSON PhyloLabel
139 instance ToJSON PhyloLabel
140 instance FromJSON Tagger
141 instance ToJSON Tagger
142 instance FromJSON Sort
144 instance FromJSON Order
145 instance ToJSON Order
146 instance FromJSON Filter
147 instance ToJSON Filter
148 instance FromJSON Synchrony
149 instance ToJSON Synchrony
152 -- | Software parameters
154 Software { _software_name :: Text
155 , _software_version :: Text
156 } deriving (Generic, Show, Eq)
158 defaultSoftware :: Software
160 Software { _software_name = pack "Gargantext"
161 , _software_version = pack "v4" }
164 -- | Global parameters of a Phylo
166 PhyloParam { _phyloParam_version :: Text
167 , _phyloParam_software :: Software
168 , _phyloParam_config :: Config
169 } deriving (Generic, Show, Eq)
171 defaultPhyloParam :: PhyloParam
173 PhyloParam { _phyloParam_version = pack "v2.adaptative"
174 , _phyloParam_software = defaultSoftware
175 , _phyloParam_config = defaultConfig }
183 -- | Date : a simple Integer
186 -- | Ngrams : a contiguous sequence of n terms
189 -- | Document : a piece of Text linked to a Date
190 data Document = Document
193 } deriving (Eq,Show,Generic,NFData)
201 -- | The Foundations of a Phylo created from a given TermList
202 data PhyloFoundations = PhyloFoundations
203 { _foundations_roots :: !(Vector Ngrams)
204 , _foundations_mapList :: TermList
205 } deriving (Generic, Show, Eq)
208 ---------------------------
209 -- | Coocurency Matrix | --
210 ---------------------------
213 -- | Cooc : a coocurency matrix between two ngrams
214 type Cooc = Map (Int,Int) Double
222 -- | Phylo datatype of a phylomemy
223 -- foundations : the foundations of the phylo
224 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
225 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
226 -- param : the parameters of the phylomemy (with the user's configuration)
227 -- periods : the temporal steps of a phylomemy
229 Phylo { _phylo_foundations :: PhyloFoundations
230 , _phylo_timeCooc :: !(Map Date Cooc)
231 , _phylo_timeDocs :: !(Map Date Double)
232 , _phylo_param :: PhyloParam
233 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
235 deriving (Generic, Show, Eq)
238 -- | PhyloPeriodId : the id of a given period
239 type PhyloPeriodId = (Date,Date)
241 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
242 -- id: tuple (start date, end date) of the temporal step of the phylomemy
243 -- levels: levels of granularity
245 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
246 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
247 } deriving (Generic, Show, Eq)
250 -- | Level : a level of clustering
253 -- | PhyloLevelId : the id of a level of clustering in a given period
254 type PhyloLevelId = (PhyloPeriodId,Level)
256 -- | PhyloLevel : levels of phylomemy on a synchronic axis
257 -- Levels description:
258 -- Level 0: The foundations and the base of the phylo
259 -- Level 1: First level of clustering (the Fis)
260 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
262 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
263 , _phylo_levelLevel :: Level
264 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
266 deriving (Generic, Show, Eq)
269 type PhyloGroupId = (PhyloLevelId, Int)
271 -- | BranchId : (a level, a sequence of branch index)
272 -- the sequence is a path of heritage from the most to the less specific branch
273 type PhyloBranchId = (Level, [Int])
275 -- | PhyloGroup : group of ngrams at each level and period
277 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
278 , _phylo_groupLevel :: Level
279 , _phylo_groupIndex :: Int
280 , _phylo_groupLabel :: Text
281 , _phylo_groupSupport :: Support
282 , _phylo_groupNgrams :: [Int]
283 , _phylo_groupCooc :: !(Cooc)
284 , _phylo_groupBranchId :: PhyloBranchId
285 , _phylo_groupMeta :: Map Text [Double]
286 , _phylo_groupLevelParents :: [Pointer]
287 , _phylo_groupLevelChilds :: [Pointer]
288 , _phylo_groupPeriodParents :: [Pointer]
289 , _phylo_groupPeriodChilds :: [Pointer]
291 deriving (Generic, Show, Eq, NFData)
293 -- | Weight : A generic mesure that can be associated with an Id
296 -- | Pointer : A weighted pointer to a given PhyloGroup
297 type Pointer = (PhyloGroupId, Weight)
299 data Filiation = ToParents | ToChilds deriving (Generic, Show)
300 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
303 ---------------------------
304 -- | Frequent Item Set | --
305 ---------------------------
307 -- | Clique : Set of ngrams cooccurring in the same Document
308 type Clique = Set Ngrams
310 -- | Support : Number of Documents where a Clique occurs
313 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
314 data PhyloFis = PhyloFis
315 { _phyloFis_clique :: Clique
316 , _phyloFis_support :: Support
317 , _phyloFis_period :: (Date,Date)
318 } deriving (Generic,NFData,Show,Eq)
325 type DotId = TextLazy.Text
327 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
329 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
331 data Order = Asc | Desc deriving (Show,Generic,Eq)
333 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
335 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
339 { _branch_labelTagger :: Tagger
340 , _branch_labelSize :: Int }
342 { _group_labelTagger :: Tagger
343 , _group_labelSize :: Int }
344 deriving (Show,Generic,Eq)
348 { _branch_id :: PhyloBranchId
349 , _branch_label :: Text
350 , _branch_meta :: Map Text [Double]
351 } deriving (Generic, Show)
355 { _export_groups :: [PhyloGroup]
356 , _export_branches :: [PhyloBranch]
357 } deriving (Generic, Show)
364 makeLenses ''Proximity
365 makeLenses ''ContextualUnit
366 makeLenses ''PhyloLabel
367 makeLenses ''TimeUnit
368 makeLenses ''PhyloFoundations
369 makeLenses ''PhyloFis
371 makeLenses ''PhyloPeriod
372 makeLenses ''PhyloLevel
373 makeLenses ''PhyloGroup
374 makeLenses ''PhyloParam
375 makeLenses ''PhyloExport
376 makeLenses ''PhyloBranch
378 ------------------------
379 -- | JSON instances | --
380 ------------------------
383 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)