2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.AdaptativePhylo where
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
47 import qualified Data.Text.Lazy as TextLazy
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
67 deriving (Show,Generic,Eq)
72 { _bpt_threshold :: Double
73 , _bpt_sensibility :: Double}
74 | ByProximityDistribution
75 deriving (Show,Generic,Eq)
82 , _year_matchingFrame :: Int }
83 deriving (Show,Generic,Eq)
90 deriving (Show,Generic,Eq)
94 Config { corpusPath :: FilePath
95 , listPath :: FilePath
96 , outputPath :: FilePath
97 , corpusParser :: CorpusParser
100 , phyloProximity :: Proximity
101 , phyloSynchrony :: Synchrony
102 , timeUnit :: TimeUnit
103 , contextualUnit :: ContextualUnit
104 , exportLabel :: [PhyloLabel]
106 , exportFilter :: [Filter]
107 } deriving (Show,Generic,Eq)
110 defaultConfig :: Config
112 Config { corpusPath = ""
115 , corpusParser = Csv 1000
116 , phyloName = pack "Default Phylo"
118 , phyloProximity = WeightedLogJaccard 10 0 0.1
119 , phyloSynchrony = ByProximityThreshold 0.4 0
120 , timeUnit = Year 3 1 5
121 , contextualUnit = Fis 2 4
122 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
123 , exportSort = ByHierarchy
124 , exportFilter = [ByBranchSize 2]
127 instance FromJSON Config
128 instance ToJSON Config
129 instance FromJSON CorpusParser
130 instance ToJSON CorpusParser
131 instance FromJSON Proximity
132 instance ToJSON Proximity
133 instance FromJSON TimeUnit
134 instance ToJSON TimeUnit
135 instance FromJSON ContextualUnit
136 instance ToJSON ContextualUnit
137 instance FromJSON PhyloLabel
138 instance ToJSON PhyloLabel
139 instance FromJSON Tagger
140 instance ToJSON Tagger
141 instance FromJSON Sort
143 instance FromJSON Order
144 instance ToJSON Order
145 instance FromJSON Filter
146 instance ToJSON Filter
147 instance FromJSON Synchrony
148 instance ToJSON Synchrony
151 -- | Software parameters
153 Software { _software_name :: Text
154 , _software_version :: Text
155 } deriving (Generic, Show, Eq)
157 defaultSoftware :: Software
159 Software { _software_name = pack "Gargantext"
160 , _software_version = pack "v4" }
163 -- | Global parameters of a Phylo
165 PhyloParam { _phyloParam_version :: Text
166 , _phyloParam_software :: Software
167 , _phyloParam_config :: Config
168 } deriving (Generic, Show, Eq)
170 defaultPhyloParam :: PhyloParam
172 PhyloParam { _phyloParam_version = pack "v2.adaptative"
173 , _phyloParam_software = defaultSoftware
174 , _phyloParam_config = defaultConfig }
182 -- | Date : a simple Integer
185 -- | Ngrams : a contiguous sequence of n terms
188 -- | Document : a piece of Text linked to a Date
189 data Document = Document
192 } deriving (Eq,Show,Generic,NFData)
200 -- | The Foundations of a Phylo created from a given TermList
201 data PhyloFoundations = PhyloFoundations
202 { _foundations_roots :: !(Vector Ngrams)
203 , _foundations_mapList :: TermList
204 } deriving (Generic, Show, Eq)
207 ---------------------------
208 -- | Coocurency Matrix | --
209 ---------------------------
212 -- | Cooc : a coocurency matrix between two ngrams
213 type Cooc = Map (Int,Int) Double
221 -- | Phylo datatype of a phylomemy
222 -- foundations : the foundations of the phylo
223 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
224 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
225 -- param : the parameters of the phylomemy (with the user's configuration)
226 -- periods : the temporal steps of a phylomemy
228 Phylo { _phylo_foundations :: PhyloFoundations
229 , _phylo_timeCooc :: !(Map Date Cooc)
230 , _phylo_timeDocs :: !(Map Date Double)
231 , _phylo_param :: PhyloParam
232 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
234 deriving (Generic, Show, Eq)
237 -- | PhyloPeriodId : the id of a given period
238 type PhyloPeriodId = (Date,Date)
240 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
241 -- id: tuple (start date, end date) of the temporal step of the phylomemy
242 -- levels: levels of granularity
244 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
245 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
246 } deriving (Generic, Show, Eq)
249 -- | Level : a level of clustering
252 -- | PhyloLevelId : the id of a level of clustering in a given period
253 type PhyloLevelId = (PhyloPeriodId,Level)
255 -- | PhyloLevel : levels of phylomemy on a synchronic axis
256 -- Levels description:
257 -- Level 0: The foundations and the base of the phylo
258 -- Level 1: First level of clustering (the Fis)
259 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
261 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
262 , _phylo_levelLevel :: Level
263 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
265 deriving (Generic, Show, Eq)
268 type PhyloGroupId = (PhyloLevelId, Int)
270 -- | BranchId : (a level, a sequence of branch index)
271 -- the sequence is a path of heritage from the most to the less specific branch
272 type PhyloBranchId = (Level, [Int])
274 -- | PhyloGroup : group of ngrams at each level and period
276 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
277 , _phylo_groupLevel :: Level
278 , _phylo_groupIndex :: Int
279 , _phylo_groupLabel :: Text
280 , _phylo_groupSupport :: Support
281 , _phylo_groupNgrams :: [Int]
282 , _phylo_groupCooc :: !(Cooc)
283 , _phylo_groupBranchId :: PhyloBranchId
284 , _phylo_groupMeta :: Map Text [Double]
285 , _phylo_groupLevelParents :: [Pointer]
286 , _phylo_groupLevelChilds :: [Pointer]
287 , _phylo_groupPeriodParents :: [Pointer]
288 , _phylo_groupPeriodChilds :: [Pointer]
290 deriving (Generic, Show, Eq, NFData)
292 -- | Weight : A generic mesure that can be associated with an Id
295 -- | Pointer : A weighted pointer to a given PhyloGroup
296 type Pointer = (PhyloGroupId, Weight)
298 data Filiation = ToParents | ToChilds deriving (Generic, Show)
299 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
302 ---------------------------
303 -- | Frequent Item Set | --
304 ---------------------------
306 -- | Clique : Set of ngrams cooccurring in the same Document
307 type Clique = Set Ngrams
309 -- | Support : Number of Documents where a Clique occurs
312 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
313 data PhyloFis = PhyloFis
314 { _phyloFis_clique :: Clique
315 , _phyloFis_support :: Support
316 , _phyloFis_period :: (Date,Date)
317 } deriving (Generic,NFData,Show,Eq)
324 type DotId = TextLazy.Text
326 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
328 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
330 data Order = Asc | Desc deriving (Show,Generic,Eq)
332 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
334 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
338 { _branch_labelTagger :: Tagger
339 , _branch_labelSize :: Int }
341 { _group_labelTagger :: Tagger
342 , _group_labelSize :: Int }
343 deriving (Show,Generic,Eq)
347 { _branch_id :: PhyloBranchId
348 , _branch_label :: Text
349 , _branch_meta :: Map Text [Double]
350 } deriving (Generic, Show)
354 { _export_groups :: [PhyloGroup]
355 , _export_branches :: [PhyloBranch]
356 } deriving (Generic, Show)
363 makeLenses ''Proximity
364 makeLenses ''ContextualUnit
365 makeLenses ''PhyloLabel
366 makeLenses ''TimeUnit
367 makeLenses ''PhyloFoundations
368 makeLenses ''PhyloFis
370 makeLenses ''PhyloPeriod
371 makeLenses ''PhyloLevel
372 makeLenses ''PhyloGroup
373 makeLenses ''PhyloParam
374 makeLenses ''PhyloExport
375 makeLenses ''PhyloBranch
377 ------------------------
378 -- | JSON instances | --
379 ------------------------
382 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)