]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
add a toPhylo function and the foundation of the Rest routes
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45 ------------------------------------------------------------------------
46 data PhyloQuery = PhyloQuery
47 { _phyloQuery_phyloName :: Text
48 , _phyloQuery_phyloDescription :: Text
49
50 , _phyloQuery_timeGrain :: Int
51 , _phyloQuery_timeSteps :: Int
52
53 , _phyloQuery_fstCluster :: Clustering
54 , _phyloQuery_timeMatching :: Proximity
55
56 , _phyloQuery_nthLevel :: Level
57 , _phyloQuery_nthCluster :: Clustering
58 } deriving (Show)
59
60
61 data PhyloExport =
62 PhyloExport { _phyloExport_param :: PhyloParam
63 , _phyloExport_data :: Phylo
64 } deriving (Generic, Show)
65
66 -- | .phylo parameters
67 data PhyloParam =
68 PhyloParam { _phyloParam_version :: Text -- Double ?
69 , _phyloParam_software :: Software
70 , _phyloParam_params :: Hash
71 , _phyloParam_query :: Maybe PhyloQuery
72 } deriving (Generic, Show)
73
74 type Hash = Text
75
76 -- | Software
77 -- TODO move somewhere since it is generic
78 data Software =
79 Software { _software_name :: Text
80 , _software_version :: Text
81 } deriving (Generic, Show)
82
83 ------------------------------------------------------------------------
84
85 -- | Phylo datatype of a phylomemy
86 -- Duration : time Segment of the whole Phylo
87 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
88 -- Periods : list of all the periods of a Phylo
89 data Phylo =
90 Phylo { _phylo_duration :: (Start, End)
91 , _phylo_foundations :: Vector Ngrams
92 , _phylo_periods :: [PhyloPeriod]
93 }
94 deriving (Generic, Show)
95
96
97 -- | Date : a simple Integer
98 type Date = Int
99
100 -- | UTCTime in seconds since UNIX epoch
101 -- type Start = POSIXTime
102 -- type End = POSIXTime
103 type Start = Date
104 type End = Date
105
106 -- | PhyloStep : steps of phylomemy on temporal axis
107 -- Period: tuple (start date, end date) of the step of the phylomemy
108 -- Levels: levels of granularity
109 data PhyloPeriod =
110 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
111 , _phylo_periodLevels :: [PhyloLevel]
112 }
113 deriving (Generic, Show)
114
115
116 -- | PhyloLevel : levels of phylomemy on level axis
117 -- Levels description:
118 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
119 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
120 -- Level 1: First level of clustering
121 -- Level N: Nth level of clustering
122 data PhyloLevel =
123 PhyloLevel { _phylo_levelId :: PhyloLevelId
124 , _phylo_levelGroups :: [PhyloGroup]
125 }
126 deriving (Generic, Show)
127
128
129 -- | PhyloGroup : group of ngrams at each level and step
130 -- Label : maybe has a label as text
131 -- Ngrams: set of terms that build the group
132 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
133 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
134 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
135 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
136 data PhyloGroup =
137 PhyloGroup { _phylo_groupId :: PhyloGroupId
138 , _phylo_groupLabel :: Text
139 , _phylo_groupNgrams :: [Int]
140 , _phylo_groupMeta :: Map Text Double
141 , _phylo_groupCooc :: Map (Int, Int) Double
142 , _phylo_groupBranchId :: Maybe PhyloBranchId
143
144 , _phylo_groupPeriodParents :: [Pointer]
145 , _phylo_groupPeriodChilds :: [Pointer]
146
147 , _phylo_groupLevelParents :: [Pointer]
148 , _phylo_groupLevelChilds :: [Pointer]
149 }
150 deriving (Generic, Show, Eq, Ord)
151
152
153 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
154 type Level = Int
155 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
156 type Index = Int
157
158
159 type PhyloPeriodId = (Start, End)
160 type PhyloLevelId = (PhyloPeriodId, Level)
161 type PhyloGroupId = (PhyloLevelId, Index)
162 type PhyloBranchId = (Level, Index)
163
164
165 -- | Weight : A generic mesure that can be associated with an Id
166 type Weight = Double
167 -- | Pointer : A weighted linked with a given PhyloGroup
168 type Pointer = (PhyloGroupId, Weight)
169 -- | Ngrams : a contiguous sequence of n terms
170 type Ngrams = Text
171
172
173 -- | Clique : Set of ngrams cooccurring in the same Document
174 type Clique = Set Ngrams
175 -- | Support : Number of Documents where a Clique occurs
176 type Support = Int
177 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
178 type Fis = (Clique,Support)
179
180
181 -- | Document : a piece of Text linked to a Date
182 data Document = Document
183 { date :: Date
184 , text :: Text
185 } deriving (Show)
186
187
188 type Cluster = [PhyloGroup]
189
190
191 -- | A List of PhyloGroup in a Graph
192 type GroupNodes = [PhyloGroup]
193 -- | A List of weighted links between some PhyloGroups in a Graph
194 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
195 -- | The association as a Graph between a list of Nodes and a list of Edges
196 type GroupGraph = (GroupNodes,GroupEdges)
197
198
199 data PhyloError = LevelDoesNotExist
200 | LevelUnassigned
201 deriving (Show)
202
203
204 -- | A List of Proximity methods names
205 data ProximityName = WeightedLogJaccard | Hamming | Filiation deriving (Show)
206 -- | A List of Clustering methods names
207 data ClusteringName = Louvain | RelatedComponents | FrequentItemSet deriving (Show)
208 -- | A constructor for Proximities
209 data Proximity = Proximity
210 { _proximity_name :: ProximityName
211 , _proximity_params :: Map Text Double
212 , _proximity_threshold :: Maybe Double } deriving (Show)
213 -- | A constructor for Clustering
214 data Clustering = Clustering
215 { _clustering_name :: ClusteringName
216 , _clustering_params :: Map Text Double
217 , _clustering_paramsBool :: Map Text Bool
218 , _clustering_proximity :: Maybe Proximity } deriving (Show)
219
220 ------------------------------------------------------------------------
221 -- | To export a Phylo | --
222
223
224 -- | PhyloView | --
225
226
227 data Filiation = Ascendant | Descendant | Complete deriving (Show)
228 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
229
230 data PhyloView = PhyloView
231 { _phylo_viewParam :: PhyloParam
232 , _phylo_viewLabel :: Text
233 , _phylo_viewDescription :: Text
234 , _phylo_viewFiliation :: Filiation
235 , _phylo_viewMeta :: Map Text Double
236 , _phylo_viewBranches :: [PhyloBranch]
237 , _phylo_viewNodes :: [PhyloNode]
238 , _phylo_viewEdges :: [PhyloEdge]
239 } deriving (Show)
240
241
242 data PhyloBranch = PhyloBranch
243 { _phylo_branchId :: PhyloBranchId
244 , _phylo_branchLabel :: Text
245 , _phylo_branchMeta :: Map Text Double
246 } deriving (Show)
247
248
249 data PhyloEdge = PhyloEdge
250 { _phylo_edgeSource :: PhyloGroupId
251 , _phylo_edgeTarget :: PhyloGroupId
252 , _phylo_edgeType :: EdgeType
253 , _phylo_edgeWeight :: Weight
254 } deriving (Show)
255
256
257 data PhyloNode = PhyloNode
258 { _phylo_nodeId :: PhyloGroupId
259 , _phylo_nodeBranchId :: Maybe PhyloBranchId
260 , _phylo_nodeLabel :: Text
261 , _phylo_nodeNgramsIdx :: [Int]
262 , _phylo_nodeNgrams :: Maybe [Ngrams]
263 , _phylo_nodeMeta :: Map Text Double
264 , _phylo_nodeParent :: Maybe PhyloGroupId
265 , _phylo_nodeChilds :: [PhyloNode]
266 } deriving (Show)
267
268 -- | PhyloQuery | --
269
270
271 data Filter = LonelyBranch
272 data Metric = BranchAge
273 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics
274
275
276 data Sort = ByBranchAge
277 data Order = Asc | Desc
278
279 data DisplayMode = Flat | Nested
280
281
282 -- | A query filter seen as : prefix && ((filter params)(clause))
283 data QueryFilter = QueryFilter
284 { _query_filter :: Filter
285 , _query_params :: [Double]
286 }
287
288
289 -- | A PhyloQueryView is the structured representation of a user query to be applied to a Phylo
290 data PhyloQueryView = PhyloQueryView
291 { _query_lvl :: Level
292
293 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ?
294 , _query_filiation :: Filiation
295
296 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
297 , _query_childs :: Bool
298 , _query_childsDepth :: Level
299
300 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
301 -- Firstly the metrics, then the filters and the taggers
302 , _query_metrics :: [Metric]
303 , _query_filters :: [QueryFilter]
304 , _query_taggers :: [Tagger]
305
306 -- An asc or desc sort to apply to the PhyloGraph
307 , _query_sort :: Maybe (Sort,Order)
308
309 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
310 , _query_display :: DisplayMode
311 , _query_verbose :: Bool
312 }
313
314
315 ------------------------------------------------------------------------
316 -- | Lenses and Json | --
317
318
319 -- | Lenses
320 makeLenses ''Phylo
321 makeLenses ''PhyloParam
322 makeLenses ''PhyloExport
323 makeLenses ''Software
324 makeLenses ''PhyloGroup
325 makeLenses ''PhyloLevel
326 makeLenses ''PhyloPeriod
327 makeLenses ''PhyloView
328 makeLenses ''PhyloQueryView
329 makeLenses ''PhyloBranch
330 makeLenses ''PhyloNode
331 makeLenses ''PhyloEdge
332 makeLenses ''Proximity
333 makeLenses ''Clustering
334 makeLenses ''QueryFilter
335 makeLenses ''PhyloQuery
336
337 -- | JSON instances
338 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
339 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
340 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
341 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
342 --
343 $(deriveJSON (unPrefix "_software_" ) ''Software )
344 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
345 $(deriveJSON (unPrefix "_clustering_" ) ''Clustering )
346 $(deriveJSON (unPrefix "_proximity_" ) ''Proximity )
347 $(deriveJSON (unPrefix "") ''ProximityName )
348 $(deriveJSON (unPrefix "") ''ClusteringName )
349 $(deriveJSON (unPrefix "_phyloQuery_" ) ''PhyloQuery )
350 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
351
352 -- | TODO XML instances
353