2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.AdaptativePhylo where
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
53 data CorpusParser = Wos | Csv deriving (Show,Generic,Eq)
55 data Proximity = WeightedLogJaccard {_sensibility :: Double}
57 deriving (Show,Generic,Eq)
60 Config { corpusPath :: FilePath
61 , listPath :: FilePath
62 , outputPath :: FilePath
63 , corpusParser :: CorpusParser
67 , phyloProximity :: Proximity
75 } deriving (Show,Generic,Eq)
77 defaultConfig :: Config
79 Config { corpusPath = ""
84 , phyloName = pack "Default Phylo"
86 , phyloProximity = WeightedLogJaccard 10
96 instance FromJSON Config
97 instance ToJSON Config
98 instance FromJSON CorpusParser
99 instance ToJSON CorpusParser
100 instance FromJSON Proximity
101 instance ToJSON Proximity
104 -- | Software parameters
106 Software { _software_name :: Text
107 , _software_version :: Text
108 } deriving (Generic, Show, Eq)
110 defaultSoftware :: Software
112 Software { _software_name = pack "Gargantext"
113 , _software_version = pack "v4" }
116 -- | Global parameters of a Phylo
118 PhyloParam { _phyloParam_version :: Text
119 , _phyloParam_software :: Software
120 , _phyloParam_config :: Config
121 } deriving (Generic, Show, Eq)
123 defaultPhyloParam :: PhyloParam
125 PhyloParam { _phyloParam_version = pack "v2.adaptative"
126 , _phyloParam_software = defaultSoftware
127 , _phyloParam_config = defaultConfig }
135 -- | Date : a simple Integer
138 -- | Ngrams : a contiguous sequence of n terms
141 -- | Document : a piece of Text linked to a Date
142 data Document = Document
145 } deriving (Eq,Show,Generic,NFData)
153 -- | The Foundations of a Phylo created from a given TermList
154 data PhyloFoundations = PhyloFoundations
155 { _foundations_roots :: !(Vector Ngrams)
156 , _foundations_mapList :: TermList
157 } deriving (Generic, Show, Eq)
160 ---------------------------
161 -- | Coocurency Matrix | --
162 ---------------------------
165 -- | Cooc : a coocurency matrix between two ngrams
166 type Cooc = Map (Int,Int) Double
174 -- | Phylo datatype of a phylomemy
175 -- foundations : the foundations of the phylo
176 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
177 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
178 -- param : the parameters of the phylomemy (with the user's configuration)
179 -- periods : the temporal steps of a phylomemy
181 Phylo { _phylo_foundations :: PhyloFoundations
182 , _phylo_timeCooc :: !(Map Date Cooc)
183 , _phylo_timeDocs :: !(Map Date Double)
184 , _phylo_param :: PhyloParam
185 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
187 deriving (Generic, Show, Eq)
190 -- | PhyloPeriodId : the id of a given period
191 type PhyloPeriodId = (Date,Date)
193 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
194 -- id: tuple (start date, end date) of the temporal step of the phylomemy
195 -- levels: levels of granularity
197 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
198 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
199 } deriving (Generic, Show, Eq)
202 -- | Level : a level of clustering
205 -- | PhyloLevelId : the id of a level of clustering in a given period
206 type PhyloLevelId = (PhyloPeriodId,Level)
208 -- | PhyloLevel : levels of phylomemy on a synchronic axis
209 -- Levels description:
210 -- Level 0: The foundations and the base of the phylo
211 -- Level 1: First level of clustering (the Fis)
212 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
214 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
215 , _phylo_levelLevel :: Level
216 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
218 deriving (Generic, Show, Eq)
221 type PhyloGroupId = (PhyloLevelId, Int)
223 -- | BranchId : (a level, a sequence of branch index)
224 -- the sequence is a path of heritage from the most to the less specific branch
225 type PhyloBranchId = (Level, [Int])
227 -- | PhyloGroup : group of ngrams at each level and period
229 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
230 , _phylo_groupLevel :: Level
231 , _phylo_groupIndex :: Int
232 , _phylo_groupSupport :: Support
233 , _phylo_groupNgrams :: [Int]
234 , _phylo_groupCooc :: !(Cooc)
235 , _phylo_groupBranchId :: PhyloBranchId
236 , _phylo_groupLevelParents :: [Pointer]
237 , _phylo_groupLevelChilds :: [Pointer]
238 , _phylo_groupPeriodParents :: [Pointer]
239 , _phylo_groupPeriodChilds :: [Pointer]
240 , _phylo_groupBreakPointer :: Maybe Pointer
242 deriving (Generic, Show, Eq)
244 -- | Weight : A generic mesure that can be associated with an Id
247 -- | Pointer : A weighted pointer to a given PhyloGroup
248 type Pointer = (PhyloGroupId, Weight)
250 type Link = ((PhyloGroupId, PhyloGroupId), Weight)
252 data Filiation = ToParents | ToChilds deriving (Generic, Show)
253 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
256 ---------------------------
257 -- | Frequent Item Set | --
258 ---------------------------
260 -- | Clique : Set of ngrams cooccurring in the same Document
261 type Clique = Set Ngrams
263 -- | Support : Number of Documents where a Clique occurs
266 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
267 data PhyloFis = PhyloFis
268 { _phyloFis_clique :: Clique
269 , _phyloFis_support :: Support
270 , _phyloFis_period :: (Date,Date)
271 } deriving (Generic,NFData,Show,Eq)
279 makeLenses ''PhyloFoundations
280 makeLenses ''PhyloFis
282 makeLenses ''PhyloPeriod
283 makeLenses ''PhyloLevel
284 makeLenses ''PhyloGroup
285 makeLenses ''PhyloParam
287 ------------------------
288 -- | JSON instances | --
289 ------------------------
292 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)