2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.AdaptativePhylo where
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
47 import qualified Data.Text.Lazy as TextLazy
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
67 deriving (Show,Generic,Eq)
74 , _year_matchingFrame :: Int }
75 deriving (Show,Generic,Eq)
82 deriving (Show,Generic,Eq)
86 Config { corpusPath :: FilePath
87 , listPath :: FilePath
88 , outputPath :: FilePath
89 , corpusParser :: CorpusParser
92 , phyloProximity :: Proximity
93 , timeUnit :: TimeUnit
94 , contextualUnit :: ContextualUnit
95 , exportLabel :: [PhyloLabel]
97 , exportFilter :: [Filter]
98 } deriving (Show,Generic,Eq)
101 defaultConfig :: Config
103 Config { corpusPath = ""
106 , corpusParser = Csv 1000
107 , phyloName = pack "Default Phylo"
109 , phyloProximity = WeightedLogJaccard 10 0 0.1
110 , timeUnit = Year 3 1 5
111 , contextualUnit = Fis 2 4
112 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
113 , exportSort = ByHierarchy
114 , exportFilter = [ByBranchSize 2]
117 instance FromJSON Config
118 instance ToJSON Config
119 instance FromJSON CorpusParser
120 instance ToJSON CorpusParser
121 instance FromJSON Proximity
122 instance ToJSON Proximity
123 instance FromJSON TimeUnit
124 instance ToJSON TimeUnit
125 instance FromJSON ContextualUnit
126 instance ToJSON ContextualUnit
127 instance FromJSON PhyloLabel
128 instance ToJSON PhyloLabel
129 instance FromJSON Tagger
130 instance ToJSON Tagger
131 instance FromJSON Sort
133 instance FromJSON Order
134 instance ToJSON Order
135 instance FromJSON Filter
136 instance ToJSON Filter
139 -- | Software parameters
141 Software { _software_name :: Text
142 , _software_version :: Text
143 } deriving (Generic, Show, Eq)
145 defaultSoftware :: Software
147 Software { _software_name = pack "Gargantext"
148 , _software_version = pack "v4" }
151 -- | Global parameters of a Phylo
153 PhyloParam { _phyloParam_version :: Text
154 , _phyloParam_software :: Software
155 , _phyloParam_config :: Config
156 } deriving (Generic, Show, Eq)
158 defaultPhyloParam :: PhyloParam
160 PhyloParam { _phyloParam_version = pack "v2.adaptative"
161 , _phyloParam_software = defaultSoftware
162 , _phyloParam_config = defaultConfig }
170 -- | Date : a simple Integer
173 -- | Ngrams : a contiguous sequence of n terms
176 -- | Document : a piece of Text linked to a Date
177 data Document = Document
180 } deriving (Eq,Show,Generic,NFData)
188 -- | The Foundations of a Phylo created from a given TermList
189 data PhyloFoundations = PhyloFoundations
190 { _foundations_roots :: !(Vector Ngrams)
191 , _foundations_mapList :: TermList
192 } deriving (Generic, Show, Eq)
195 ---------------------------
196 -- | Coocurency Matrix | --
197 ---------------------------
200 -- | Cooc : a coocurency matrix between two ngrams
201 type Cooc = Map (Int,Int) Double
209 -- | Phylo datatype of a phylomemy
210 -- foundations : the foundations of the phylo
211 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
212 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
213 -- param : the parameters of the phylomemy (with the user's configuration)
214 -- periods : the temporal steps of a phylomemy
216 Phylo { _phylo_foundations :: PhyloFoundations
217 , _phylo_timeCooc :: !(Map Date Cooc)
218 , _phylo_timeDocs :: !(Map Date Double)
219 , _phylo_param :: PhyloParam
220 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
222 deriving (Generic, Show, Eq)
225 -- | PhyloPeriodId : the id of a given period
226 type PhyloPeriodId = (Date,Date)
228 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
229 -- id: tuple (start date, end date) of the temporal step of the phylomemy
230 -- levels: levels of granularity
232 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
233 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
234 } deriving (Generic, Show, Eq)
237 -- | Level : a level of clustering
240 -- | PhyloLevelId : the id of a level of clustering in a given period
241 type PhyloLevelId = (PhyloPeriodId,Level)
243 -- | PhyloLevel : levels of phylomemy on a synchronic axis
244 -- Levels description:
245 -- Level 0: The foundations and the base of the phylo
246 -- Level 1: First level of clustering (the Fis)
247 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
249 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
250 , _phylo_levelLevel :: Level
251 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
253 deriving (Generic, Show, Eq)
256 type PhyloGroupId = (PhyloLevelId, Int)
258 -- | BranchId : (a level, a sequence of branch index)
259 -- the sequence is a path of heritage from the most to the less specific branch
260 type PhyloBranchId = (Level, [Int])
262 -- | PhyloGroup : group of ngrams at each level and period
264 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
265 , _phylo_groupLevel :: Level
266 , _phylo_groupIndex :: Int
267 , _phylo_groupLabel :: Text
268 , _phylo_groupSupport :: Support
269 , _phylo_groupNgrams :: [Int]
270 , _phylo_groupCooc :: !(Cooc)
271 , _phylo_groupBranchId :: PhyloBranchId
272 , _phylo_groupMeta :: Map Text [Double]
273 , _phylo_groupLevelParents :: [Pointer]
274 , _phylo_groupLevelChilds :: [Pointer]
275 , _phylo_groupPeriodParents :: [Pointer]
276 , _phylo_groupPeriodChilds :: [Pointer]
278 deriving (Generic, Show, Eq)
280 -- | Weight : A generic mesure that can be associated with an Id
283 -- | Pointer : A weighted pointer to a given PhyloGroup
284 type Pointer = (PhyloGroupId, Weight)
286 data Filiation = ToParents | ToChilds deriving (Generic, Show)
287 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
290 ---------------------------
291 -- | Frequent Item Set | --
292 ---------------------------
294 -- | Clique : Set of ngrams cooccurring in the same Document
295 type Clique = Set Ngrams
297 -- | Support : Number of Documents where a Clique occurs
300 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
301 data PhyloFis = PhyloFis
302 { _phyloFis_clique :: Clique
303 , _phyloFis_support :: Support
304 , _phyloFis_period :: (Date,Date)
305 } deriving (Generic,NFData,Show,Eq)
312 type DotId = TextLazy.Text
314 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
316 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
318 data Order = Asc | Desc deriving (Show,Generic,Eq)
320 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
322 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
326 { _branch_labelTagger :: Tagger
327 , _branch_labelSize :: Int }
329 { _group_labelTagger :: Tagger
330 , _group_labelSize :: Int }
331 deriving (Show,Generic,Eq)
335 { _branch_id :: PhyloBranchId
336 , _branch_label :: Text
337 , _branch_meta :: Map Text [Double]
338 } deriving (Generic, Show)
342 { _export_groups :: [PhyloGroup]
343 , _export_branches :: [PhyloBranch]
344 } deriving (Generic, Show)
351 makeLenses ''Proximity
352 makeLenses ''ContextualUnit
353 makeLenses ''PhyloLabel
354 makeLenses ''TimeUnit
355 makeLenses ''PhyloFoundations
356 makeLenses ''PhyloFis
358 makeLenses ''PhyloPeriod
359 makeLenses ''PhyloLevel
360 makeLenses ''PhyloGroup
361 makeLenses ''PhyloParam
362 makeLenses ''PhyloExport
363 makeLenses ''PhyloBranch
365 ------------------------
366 -- | JSON instances | --
367 ------------------------
370 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)