2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.AdaptativePhylo where
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
54 Wos {_wos_limit :: Int}
55 | Csv {_csv_limit :: Int}
56 deriving (Show,Generic,Eq)
61 { _wlj_sensibility :: Double
62 , _wlj_thresholdInit :: Double
63 , _wlj_thresholdStep :: Double }
65 deriving (Show,Generic,Eq)
72 , _year_matchingFrame :: Int }
73 deriving (Show,Generic,Eq)
80 deriving (Show,Generic,Eq)
84 Config { corpusPath :: FilePath
85 , listPath :: FilePath
86 , outputPath :: FilePath
87 , corpusParser :: CorpusParser
90 , phyloProximity :: Proximity
91 , timeUnit :: TimeUnit
92 , contextualUnit :: ContextualUnit
94 } deriving (Show,Generic,Eq)
97 defaultConfig :: Config
99 Config { corpusPath = ""
102 , corpusParser = Csv 1000
103 , phyloName = pack "Default Phylo"
105 , phyloProximity = WeightedLogJaccard 10 0 0.05
106 , timeUnit = Year 3 1 5
107 , contextualUnit = Fis 2 4
111 instance FromJSON Config
112 instance ToJSON Config
113 instance FromJSON CorpusParser
114 instance ToJSON CorpusParser
115 instance FromJSON Proximity
116 instance ToJSON Proximity
117 instance FromJSON TimeUnit
118 instance ToJSON TimeUnit
119 instance FromJSON ContextualUnit
120 instance ToJSON ContextualUnit
123 -- | Software parameters
125 Software { _software_name :: Text
126 , _software_version :: Text
127 } deriving (Generic, Show, Eq)
129 defaultSoftware :: Software
131 Software { _software_name = pack "Gargantext"
132 , _software_version = pack "v4" }
135 -- | Global parameters of a Phylo
137 PhyloParam { _phyloParam_version :: Text
138 , _phyloParam_software :: Software
139 , _phyloParam_config :: Config
140 } deriving (Generic, Show, Eq)
142 defaultPhyloParam :: PhyloParam
144 PhyloParam { _phyloParam_version = pack "v2.adaptative"
145 , _phyloParam_software = defaultSoftware
146 , _phyloParam_config = defaultConfig }
154 -- | Date : a simple Integer
157 -- | Ngrams : a contiguous sequence of n terms
160 -- | Document : a piece of Text linked to a Date
161 data Document = Document
164 } deriving (Eq,Show,Generic,NFData)
172 -- | The Foundations of a Phylo created from a given TermList
173 data PhyloFoundations = PhyloFoundations
174 { _foundations_roots :: !(Vector Ngrams)
175 , _foundations_mapList :: TermList
176 } deriving (Generic, Show, Eq)
179 ---------------------------
180 -- | Coocurency Matrix | --
181 ---------------------------
184 -- | Cooc : a coocurency matrix between two ngrams
185 type Cooc = Map (Int,Int) Double
193 -- | Phylo datatype of a phylomemy
194 -- foundations : the foundations of the phylo
195 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
196 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
197 -- param : the parameters of the phylomemy (with the user's configuration)
198 -- periods : the temporal steps of a phylomemy
200 Phylo { _phylo_foundations :: PhyloFoundations
201 , _phylo_timeCooc :: !(Map Date Cooc)
202 , _phylo_timeDocs :: !(Map Date Double)
203 , _phylo_param :: PhyloParam
204 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
206 deriving (Generic, Show, Eq)
209 -- | PhyloPeriodId : the id of a given period
210 type PhyloPeriodId = (Date,Date)
212 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
213 -- id: tuple (start date, end date) of the temporal step of the phylomemy
214 -- levels: levels of granularity
216 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
217 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
218 } deriving (Generic, Show, Eq)
221 -- | Level : a level of clustering
224 -- | PhyloLevelId : the id of a level of clustering in a given period
225 type PhyloLevelId = (PhyloPeriodId,Level)
227 -- | PhyloLevel : levels of phylomemy on a synchronic axis
228 -- Levels description:
229 -- Level 0: The foundations and the base of the phylo
230 -- Level 1: First level of clustering (the Fis)
231 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
233 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
234 , _phylo_levelLevel :: Level
235 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
237 deriving (Generic, Show, Eq)
240 type PhyloGroupId = (PhyloLevelId, Int)
242 -- | BranchId : (a level, a sequence of branch index)
243 -- the sequence is a path of heritage from the most to the less specific branch
244 type PhyloBranchId = (Level, [Int])
246 -- | PhyloGroup : group of ngrams at each level and period
248 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
249 , _phylo_groupLevel :: Level
250 , _phylo_groupIndex :: Int
251 , _phylo_groupSupport :: Support
252 , _phylo_groupNgrams :: [Int]
253 , _phylo_groupCooc :: !(Cooc)
254 , _phylo_groupBranchId :: PhyloBranchId
255 , _phylo_groupLevelParents :: [Pointer]
256 , _phylo_groupLevelChilds :: [Pointer]
257 , _phylo_groupPeriodParents :: [Pointer]
258 , _phylo_groupPeriodChilds :: [Pointer]
259 , _phylo_groupGhostPointers :: [Pointer]
261 deriving (Generic, Show, Eq)
263 -- | Weight : A generic mesure that can be associated with an Id
266 -- | Pointer : A weighted pointer to a given PhyloGroup
267 type Pointer = (PhyloGroupId, Weight)
269 type Link = ((PhyloGroupId, PhyloGroupId), Weight)
271 data Filiation = ToParents | ToChilds deriving (Generic, Show)
272 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
275 ---------------------------
276 -- | Frequent Item Set | --
277 ---------------------------
279 -- | Clique : Set of ngrams cooccurring in the same Document
280 type Clique = Set Ngrams
282 -- | Support : Number of Documents where a Clique occurs
285 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
286 data PhyloFis = PhyloFis
287 { _phyloFis_clique :: Clique
288 , _phyloFis_support :: Support
289 , _phyloFis_period :: (Date,Date)
290 } deriving (Generic,NFData,Show,Eq)
298 makeLenses ''Proximity
299 makeLenses ''ContextualUnit
300 makeLenses ''TimeUnit
301 makeLenses ''PhyloFoundations
302 makeLenses ''PhyloFis
304 makeLenses ''PhyloPeriod
305 makeLenses ''PhyloLevel
306 makeLenses ''PhyloGroup
307 makeLenses ''PhyloParam
309 ------------------------
310 -- | JSON instances | --
311 ------------------------
314 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)