2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.AdaptativePhylo where
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
47 import qualified Data.Text.Lazy as TextLazy
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
67 deriving (Show,Generic,Eq)
72 { _bpt_threshold :: Double
73 , _bpt_sensibility :: Double}
74 | ByProximityDistribution
75 { _bpd_sensibility :: Double}
76 deriving (Show,Generic,Eq)
83 , _year_matchingFrame :: Int }
84 deriving (Show,Generic,Eq)
91 deriving (Show,Generic,Eq)
95 Quality { _qua_relevance :: Double
96 , _qua_minBranch :: Int }
97 deriving (Show,Generic,Eq)
101 Config { corpusPath :: FilePath
102 , listPath :: FilePath
103 , outputPath :: FilePath
104 , corpusParser :: CorpusParser
107 , phyloProximity :: Proximity
108 , phyloSynchrony :: Synchrony
109 , phyloQuality :: Quality
110 , timeUnit :: TimeUnit
111 , contextualUnit :: ContextualUnit
112 , exportLabel :: [PhyloLabel]
114 , exportFilter :: [Filter]
115 } deriving (Show,Generic,Eq)
118 defaultConfig :: Config
120 Config { corpusPath = ""
123 , corpusParser = Csv 1000
124 , phyloName = pack "Default Phylo"
126 , phyloProximity = WeightedLogJaccard 10 0 0.1
127 , phyloSynchrony = ByProximityDistribution 0
128 , phyloQuality = Quality 10 3
129 , timeUnit = Year 3 1 5
130 , contextualUnit = Fis 1 5
131 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
132 , exportSort = ByHierarchy
133 , exportFilter = [ByBranchSize 2]
136 instance FromJSON Config
137 instance ToJSON Config
138 instance FromJSON CorpusParser
139 instance ToJSON CorpusParser
140 instance FromJSON Proximity
141 instance ToJSON Proximity
142 instance FromJSON TimeUnit
143 instance ToJSON TimeUnit
144 instance FromJSON ContextualUnit
145 instance ToJSON ContextualUnit
146 instance FromJSON PhyloLabel
147 instance ToJSON PhyloLabel
148 instance FromJSON Tagger
149 instance ToJSON Tagger
150 instance FromJSON Sort
152 instance FromJSON Order
153 instance ToJSON Order
154 instance FromJSON Filter
155 instance ToJSON Filter
156 instance FromJSON Synchrony
157 instance ToJSON Synchrony
158 instance FromJSON Quality
159 instance ToJSON Quality
162 -- | Software parameters
164 Software { _software_name :: Text
165 , _software_version :: Text
166 } deriving (Generic, Show, Eq)
168 defaultSoftware :: Software
170 Software { _software_name = pack "Gargantext"
171 , _software_version = pack "v4" }
174 -- | Global parameters of a Phylo
176 PhyloParam { _phyloParam_version :: Text
177 , _phyloParam_software :: Software
178 , _phyloParam_config :: Config
179 } deriving (Generic, Show, Eq)
181 defaultPhyloParam :: PhyloParam
183 PhyloParam { _phyloParam_version = pack "v2.adaptative"
184 , _phyloParam_software = defaultSoftware
185 , _phyloParam_config = defaultConfig }
193 -- | Date : a simple Integer
196 -- | Ngrams : a contiguous sequence of n terms
199 -- | Document : a piece of Text linked to a Date
200 data Document = Document
203 } deriving (Eq,Show,Generic,NFData)
211 -- | The Foundations of a Phylo created from a given TermList
212 data PhyloFoundations = PhyloFoundations
213 { _foundations_roots :: !(Vector Ngrams)
214 , _foundations_mapList :: TermList
215 } deriving (Generic, Show, Eq)
218 ---------------------------
219 -- | Coocurency Matrix | --
220 ---------------------------
223 -- | Cooc : a coocurency matrix between two ngrams
224 type Cooc = Map (Int,Int) Double
232 -- | Phylo datatype of a phylomemy
233 -- foundations : the foundations of the phylo
234 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
235 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
236 -- param : the parameters of the phylomemy (with the user's configuration)
237 -- periods : the temporal steps of a phylomemy
239 Phylo { _phylo_foundations :: PhyloFoundations
240 , _phylo_timeCooc :: !(Map Date Cooc)
241 , _phylo_timeDocs :: !(Map Date Double)
242 , _phylo_termFreq :: !(Map Int Double)
243 , _phylo_param :: PhyloParam
244 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
246 deriving (Generic, Show, Eq)
249 -- | PhyloPeriodId : the id of a given period
250 type PhyloPeriodId = (Date,Date)
252 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
253 -- id: tuple (start date, end date) of the temporal step of the phylomemy
254 -- levels: levels of granularity
256 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
257 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
258 } deriving (Generic, Show, Eq)
261 -- | Level : a level of clustering
264 -- | PhyloLevelId : the id of a level of clustering in a given period
265 type PhyloLevelId = (PhyloPeriodId,Level)
267 -- | PhyloLevel : levels of phylomemy on a synchronic axis
268 -- Levels description:
269 -- Level 0: The foundations and the base of the phylo
270 -- Level 1: First level of clustering (the Fis)
271 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
273 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
274 , _phylo_levelLevel :: Level
275 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
277 deriving (Generic, Show, Eq)
280 type PhyloGroupId = (PhyloLevelId, Int)
282 -- | BranchId : (a level, a sequence of branch index)
283 -- the sequence is a path of heritage from the most to the less specific branch
284 type PhyloBranchId = (Level, [Int])
286 -- | PhyloGroup : group of ngrams at each level and period
288 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
289 , _phylo_groupLevel :: Level
290 , _phylo_groupIndex :: Int
291 , _phylo_groupLabel :: Text
292 , _phylo_groupSupport :: Support
293 , _phylo_groupNgrams :: [Int]
294 , _phylo_groupCooc :: !(Cooc)
295 , _phylo_groupBranchId :: PhyloBranchId
296 , _phylo_groupMeta :: Map Text [Double]
297 , _phylo_groupLevelParents :: [Pointer]
298 , _phylo_groupLevelChilds :: [Pointer]
299 , _phylo_groupPeriodParents :: [Pointer]
300 , _phylo_groupPeriodChilds :: [Pointer]
302 deriving (Generic, Show, Eq, NFData)
304 -- | Weight : A generic mesure that can be associated with an Id
307 -- | Pointer : A weighted pointer to a given PhyloGroup
308 type Pointer = (PhyloGroupId, Weight)
310 data Filiation = ToParents | ToChilds deriving (Generic, Show)
311 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
314 ---------------------------
315 -- | Frequent Item Set | --
316 ---------------------------
318 -- | Clique : Set of ngrams cooccurring in the same Document
319 type Clique = Set Ngrams
321 -- | Support : Number of Documents where a Clique occurs
324 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
325 data PhyloFis = PhyloFis
326 { _phyloFis_clique :: Clique
327 , _phyloFis_support :: Support
328 , _phyloFis_period :: (Date,Date)
329 } deriving (Generic,NFData,Show,Eq)
336 type DotId = TextLazy.Text
338 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
340 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
342 data Order = Asc | Desc deriving (Show,Generic,Eq)
344 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
346 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
350 { _branch_labelTagger :: Tagger
351 , _branch_labelSize :: Int }
353 { _group_labelTagger :: Tagger
354 , _group_labelSize :: Int }
355 deriving (Show,Generic,Eq)
359 { _branch_id :: PhyloBranchId
360 , _branch_label :: Text
361 , _branch_meta :: Map Text [Double]
362 } deriving (Generic, Show)
366 { _export_groups :: [PhyloGroup]
367 , _export_branches :: [PhyloBranch]
368 } deriving (Generic, Show)
375 makeLenses ''Proximity
377 makeLenses ''ContextualUnit
378 makeLenses ''PhyloLabel
379 makeLenses ''TimeUnit
380 makeLenses ''PhyloFoundations
381 makeLenses ''PhyloFis
383 makeLenses ''PhyloPeriod
384 makeLenses ''PhyloLevel
385 makeLenses ''PhyloGroup
386 makeLenses ''PhyloParam
387 makeLenses ''PhyloExport
388 makeLenses ''PhyloBranch
390 ------------------------
391 -- | JSON instances | --
392 ------------------------
395 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)