2 Module : Gargantext.Viz.AdaptativePhylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
24 {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE TemplateHaskell #-}
27 {-# LANGUAGE MultiParamTypeClasses #-}
29 module Gargantext.Viz.AdaptativePhylo where
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Text (Text, pack)
34 import Data.Vector (Vector)
38 import Gargantext.Core.Utils.Prefix (unPrefix)
39 import Gargantext.Prelude
40 import Gargantext.Text.Context (TermList)
43 import GHC.IO (FilePath)
44 import Control.DeepSeq (NFData)
45 import Control.Lens (makeLenses)
47 import qualified Data.Text.Lazy as TextLazy
56 Wos {_wos_limit :: Int}
57 | Csv {_csv_limit :: Int}
58 deriving (Show,Generic,Eq)
63 { _wlj_sensibility :: Double
64 , _wlj_thresholdInit :: Double
65 , _wlj_thresholdStep :: Double }
67 deriving (Show,Generic,Eq)
72 { _bpt_threshold :: Double
73 , _bpt_sensibility :: Double}
74 | ByProximityDistribution
75 { _bpd_sensibility :: Double}
76 deriving (Show,Generic,Eq)
83 , _year_matchingFrame :: Int }
84 deriving (Show,Generic,Eq)
92 { _clique_size :: Int }
93 deriving (Show,Generic,Eq)
97 Quality { _qua_granularity :: Double
98 , _qua_minBranch :: Int }
99 deriving (Show,Generic,Eq)
103 Config { corpusPath :: FilePath
104 , listPath :: FilePath
105 , outputPath :: FilePath
106 , corpusParser :: CorpusParser
109 , phyloProximity :: Proximity
110 , phyloSynchrony :: Synchrony
111 , phyloQuality :: Quality
112 , timeUnit :: TimeUnit
113 , contextualUnit :: ContextualUnit
114 , exportLabel :: [PhyloLabel]
116 , exportFilter :: [Filter]
117 } deriving (Show,Generic,Eq)
120 defaultConfig :: Config
122 Config { corpusPath = ""
125 , corpusParser = Csv 1000
126 , phyloName = pack "Default Phylo"
128 , phyloProximity = WeightedLogJaccard 10 0 0.1
129 , phyloSynchrony = ByProximityDistribution 0
130 , phyloQuality = Quality 0.5 1
131 , timeUnit = Year 3 1 5
132 , contextualUnit = Fis 1 5
133 , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
134 , exportSort = ByHierarchy
135 , exportFilter = [ByBranchSize 2]
138 instance FromJSON Config
139 instance ToJSON Config
140 instance FromJSON CorpusParser
141 instance ToJSON CorpusParser
142 instance FromJSON Proximity
143 instance ToJSON Proximity
144 instance FromJSON TimeUnit
145 instance ToJSON TimeUnit
146 instance FromJSON ContextualUnit
147 instance ToJSON ContextualUnit
148 instance FromJSON PhyloLabel
149 instance ToJSON PhyloLabel
150 instance FromJSON Tagger
151 instance ToJSON Tagger
152 instance FromJSON Sort
154 instance FromJSON Order
155 instance ToJSON Order
156 instance FromJSON Filter
157 instance ToJSON Filter
158 instance FromJSON Synchrony
159 instance ToJSON Synchrony
160 instance FromJSON Quality
161 instance ToJSON Quality
164 -- | Software parameters
166 Software { _software_name :: Text
167 , _software_version :: Text
168 } deriving (Generic, Show, Eq)
170 defaultSoftware :: Software
172 Software { _software_name = pack "Gargantext"
173 , _software_version = pack "v4" }
176 -- | Global parameters of a Phylo
178 PhyloParam { _phyloParam_version :: Text
179 , _phyloParam_software :: Software
180 , _phyloParam_config :: Config
181 } deriving (Generic, Show, Eq)
183 defaultPhyloParam :: PhyloParam
185 PhyloParam { _phyloParam_version = pack "v2.adaptative"
186 , _phyloParam_software = defaultSoftware
187 , _phyloParam_config = defaultConfig }
195 -- | Date : a simple Integer
198 -- | Ngrams : a contiguous sequence of n terms
201 -- | Document : a piece of Text linked to a Date
202 data Document = Document
205 } deriving (Eq,Show,Generic,NFData)
213 -- | The Foundations of a Phylo created from a given TermList
214 data PhyloFoundations = PhyloFoundations
215 { _foundations_roots :: !(Vector Ngrams)
216 , _foundations_mapList :: TermList
217 } deriving (Generic, Show, Eq)
220 ---------------------------
221 -- | Coocurency Matrix | --
222 ---------------------------
225 -- | Cooc : a coocurency matrix between two ngrams
226 type Cooc = Map (Int,Int) Double
234 -- | Phylo datatype of a phylomemy
235 -- foundations : the foundations of the phylo
236 -- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
237 -- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
238 -- param : the parameters of the phylomemy (with the user's configuration)
239 -- periods : the temporal steps of a phylomemy
241 Phylo { _phylo_foundations :: PhyloFoundations
242 , _phylo_timeCooc :: !(Map Date Cooc)
243 , _phylo_timeDocs :: !(Map Date Double)
244 , _phylo_termFreq :: !(Map Int Double)
245 , _phylo_param :: PhyloParam
246 , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
248 deriving (Generic, Show, Eq)
251 -- | PhyloPeriodId : the id of a given period
252 type PhyloPeriodId = (Date,Date)
254 -- | PhyloPeriod : steps of a phylomemy on a temporal axis
255 -- id: tuple (start date, end date) of the temporal step of the phylomemy
256 -- levels: levels of granularity
258 PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
259 , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
260 } deriving (Generic, Show, Eq)
263 -- | Level : a level of clustering
266 -- | PhyloLevelId : the id of a level of clustering in a given period
267 type PhyloLevelId = (PhyloPeriodId,Level)
269 -- | PhyloLevel : levels of phylomemy on a synchronic axis
270 -- Levels description:
271 -- Level 0: The foundations and the base of the phylo
272 -- Level 1: First level of clustering (the Fis)
273 -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
275 PhyloLevel { _phylo_levelPeriod :: (Date,Date)
276 , _phylo_levelLevel :: Level
277 , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
279 deriving (Generic, Show, Eq)
282 type PhyloGroupId = (PhyloLevelId, Int)
284 -- | BranchId : (a level, a sequence of branch index)
285 -- the sequence is a path of heritage from the most to the less specific branch
286 type PhyloBranchId = (Level, [Int])
288 -- | PhyloGroup : group of ngrams at each level and period
290 PhyloGroup { _phylo_groupPeriod :: (Date,Date)
291 , _phylo_groupLevel :: Level
292 , _phylo_groupIndex :: Int
293 , _phylo_groupLabel :: Text
294 , _phylo_groupSupport :: Support
295 , _phylo_groupNgrams :: [Int]
296 , _phylo_groupCooc :: !(Cooc)
297 , _phylo_groupBranchId :: PhyloBranchId
298 , _phylo_groupMeta :: Map Text [Double]
299 , _phylo_groupLevelParents :: [Pointer]
300 , _phylo_groupLevelChilds :: [Pointer]
301 , _phylo_groupPeriodParents :: [Pointer]
302 , _phylo_groupPeriodChilds :: [Pointer]
304 deriving (Generic, Show, Eq, NFData)
306 -- | Weight : A generic mesure that can be associated with an Id
309 -- | Pointer : A weighted pointer to a given PhyloGroup
310 type Pointer = (PhyloGroupId, Weight)
312 data Filiation = ToParents | ToChilds deriving (Generic, Show)
313 data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
316 ---------------------------
317 -- | Frequent Item Set | --
318 ---------------------------
320 -- | Support : Number of Documents where a Clique occurs
323 data PhyloCUnit = PhyloCUnit
324 { _phyloCUnit_nodes :: Set Ngrams
325 , _phyloCUnit_support :: Support
326 , _phyloCUnit_period :: (Date,Date)
327 } deriving (Generic,NFData,Show,Eq)
334 type DotId = TextLazy.Text
336 data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
338 data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
340 data Order = Asc | Desc deriving (Show,Generic,Eq)
342 data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
344 data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
348 { _branch_labelTagger :: Tagger
349 , _branch_labelSize :: Int }
351 { _group_labelTagger :: Tagger
352 , _group_labelSize :: Int }
353 deriving (Show,Generic,Eq)
357 { _branch_id :: PhyloBranchId
358 , _branch_label :: Text
359 , _branch_meta :: Map Text [Double]
360 } deriving (Generic, Show)
364 { _export_groups :: [PhyloGroup]
365 , _export_branches :: [PhyloBranch]
366 } deriving (Generic, Show)
373 makeLenses ''Proximity
375 makeLenses ''ContextualUnit
376 makeLenses ''PhyloLabel
377 makeLenses ''TimeUnit
378 makeLenses ''PhyloFoundations
379 makeLenses ''PhyloCUnit
381 makeLenses ''PhyloPeriod
382 makeLenses ''PhyloLevel
383 makeLenses ''PhyloGroup
384 makeLenses ''PhyloParam
385 makeLenses ''PhyloExport
386 makeLenses ''PhyloBranch
388 ------------------------
389 -- | JSON instances | --
390 ------------------------
393 $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)