]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
add the Metrics and the Filters
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45 ------------------------------------------------------------------------
46 data PhyloExport =
47 PhyloExport { _phyloExport_param :: PhyloParam
48 , _phyloExport_data :: Phylo
49 } deriving (Generic, Show)
50
51 -- | .phylo parameters
52 data PhyloParam =
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_params :: Hash
56 } deriving (Generic, Show)
57
58 type Hash = Text
59
60 -- | Software
61 -- TODO move somewhere since it is generic
62 data Software =
63 Software { _software_name :: Text
64 , _software_version :: Text
65 } deriving (Generic, Show)
66
67 ------------------------------------------------------------------------
68
69 -- | Phylo datatype of a phylomemy
70 -- Duration : time Segment of the whole Phylo
71 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
72 -- Periods : list of all the periods of a Phylo
73 data Phylo =
74 Phylo { _phylo_duration :: (Start, End)
75 , _phylo_foundations :: Vector Ngrams
76 , _phylo_periods :: [PhyloPeriod]
77 }
78 deriving (Generic, Show)
79
80
81 -- | Date : a simple Integer
82 type Date = Int
83
84 -- | UTCTime in seconds since UNIX epoch
85 -- type Start = POSIXTime
86 -- type End = POSIXTime
87 type Start = Date
88 type End = Date
89
90 -- | PhyloStep : steps of phylomemy on temporal axis
91 -- Period: tuple (start date, end date) of the step of the phylomemy
92 -- Levels: levels of granularity
93 data PhyloPeriod =
94 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
95 , _phylo_periodLevels :: [PhyloLevel]
96 }
97 deriving (Generic, Show)
98
99
100 -- | PhyloLevel : levels of phylomemy on level axis
101 -- Levels description:
102 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
103 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
104 -- Level 1: First level of clustering
105 -- Level N: Nth level of clustering
106 data PhyloLevel =
107 PhyloLevel { _phylo_levelId :: PhyloLevelId
108 , _phylo_levelGroups :: [PhyloGroup]
109 }
110 deriving (Generic, Show)
111
112
113 -- | PhyloGroup : group of ngrams at each level and step
114 -- Label : maybe has a label as text
115 -- Ngrams: set of terms that build the group
116 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
117 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
118 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
119 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
120 data PhyloGroup =
121 PhyloGroup { _phylo_groupId :: PhyloGroupId
122 , _phylo_groupLabel :: Text
123 , _phylo_groupNgrams :: [Int]
124 , _phylo_groupMeta :: Map Text Double
125 , _phylo_groupCooc :: Map (Int, Int) Double
126 , _phylo_groupBranchId :: Maybe PhyloBranchId
127
128 , _phylo_groupPeriodParents :: [Pointer]
129 , _phylo_groupPeriodChilds :: [Pointer]
130
131 , _phylo_groupLevelParents :: [Pointer]
132 , _phylo_groupLevelChilds :: [Pointer]
133 }
134 deriving (Generic, Show, Eq, Ord)
135
136
137 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
138 type Level = Int
139 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
140 type Index = Int
141
142
143 type PhyloPeriodId = (Start, End)
144 type PhyloLevelId = (PhyloPeriodId, Level)
145 type PhyloGroupId = (PhyloLevelId, Index)
146 type PhyloBranchId = (Level, Index)
147
148
149 -- | Weight : A generic mesure that can be associated with an Id
150 type Weight = Double
151 -- | Pointer : A weighted linked with a given PhyloGroup
152 type Pointer = (PhyloGroupId, Weight)
153 -- | Ngrams : a contiguous sequence of n terms
154 type Ngrams = Text
155
156
157 -- | Clique : Set of ngrams cooccurring in the same Document
158 type Clique = Set Ngrams
159 -- | Support : Number of Documents where a Clique occurs
160 type Support = Int
161 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
162 type Fis = (Clique,Support)
163
164
165 -- | Document : a piece of Text linked to a Date
166 data Document = Document
167 { date :: Date
168 , text :: Text
169 } deriving (Show)
170
171
172 type Cluster = [PhyloGroup]
173
174
175 -- | A List of PhyloGroup in a Graph
176 type GroupNodes = [PhyloGroup]
177 -- | A List of weighted links between some PhyloGroups in a Graph
178 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
179 -- | The association as a Graph between a list of Nodes and a list of Edges
180 type GroupGraph = (GroupNodes,GroupEdges)
181
182
183 data PhyloError = LevelDoesNotExist
184 | LevelUnassigned
185 deriving (Show)
186
187
188 -- | A List of Proximity mesures or strategies
189 data Proximity = WeightedLogJaccard | Hamming | FromPairs
190 -- | A List of Clustering methods
191 data Clustering = Louvain | RelatedComponents
192
193 data PairTo = Childs | Parents
194
195 ------------------------------------------------------------------------
196 -- | To export a Phylo | --
197
198
199 -- | PhyloView | --
200
201
202 data Filiation = Ascendant | Descendant | Complete deriving (Show)
203 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
204
205 data PhyloView = PhyloView
206 { _phylo_viewParam :: PhyloParam
207 , _phylo_viewLabel :: Text
208 , _phylo_viewDescription :: Text
209 , _phylo_viewFiliation :: Filiation
210 , _phylo_viewMeta :: Map Text Double
211 , _phylo_viewBranches :: [PhyloBranch]
212 , _phylo_viewNodes :: [PhyloNode]
213 , _phylo_viewEdges :: [PhyloEdge]
214 } deriving (Show)
215
216
217 data PhyloBranch = PhyloBranch
218 { _phylo_branchId :: PhyloBranchId
219 , _phylo_branchLabel :: Text
220 , _phylo_branchMeta :: Map Text Double
221 } deriving (Show)
222
223
224 data PhyloEdge = PhyloEdge
225 { _phylo_edgeSource :: PhyloGroupId
226 , _phylo_edgeTarget :: PhyloGroupId
227 , _phylo_edgeType :: EdgeType
228 , _phylo_edgeWeight :: Weight
229 } deriving (Show)
230
231
232 data PhyloNode = PhyloNode
233 { _phylo_nodeId :: PhyloGroupId
234 , _phylo_nodeBranchId :: Maybe PhyloBranchId
235 , _phylo_nodeLabel :: Text
236 , _phylo_nodeNgramsIdx :: [Int]
237 , _phylo_nodeNgrams :: Maybe [Ngrams]
238 , _phylo_nodeMeta :: Map Text Double
239 , _phylo_nodeParent :: Maybe PhyloGroupId
240 } deriving (Show)
241
242 -- | PhyloQuery | --
243
244
245 data Filter = LonelyBranch
246 data Metric = BranchAge
247 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics
248
249
250 data Sort = ByBranchAge
251 data Order = Asc | Desc
252
253 data DisplayMode = Flat | Nested
254
255
256 -- | A query filter seen as : prefix && ((filter params)(clause))
257 data QueryFilter = QueryFilter
258 { _query_filter :: Filter
259 , _query_params :: [Double]
260 }
261
262
263 -- | A PhyloQuery is the structured representation of a user query to be applied to a Phylo
264 data PhyloQuery = PhyloQuery
265 { _query_lvl :: Level
266
267 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ?
268 , _query_filiation :: Filiation
269
270 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
271 , _query_childs :: Bool
272 , _query_childsDepth :: Level
273
274 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
275 -- Firstly the metrics, then the filters and the taggers
276 , _query_metrics :: [Metric]
277 , _query_filters :: [QueryFilter]
278 , _query_taggers :: [Tagger]
279
280 -- An asc or desc sort to apply to the PhyloGraph
281 , _query_sort :: Maybe (Sort,Order)
282
283 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
284 , _query_display :: DisplayMode
285 , _query_verbose :: Bool
286 }
287
288
289 ------------------------------------------------------------------------
290 -- | Lenses and Json | --
291
292
293 -- | Lenses
294 makeLenses ''Phylo
295 makeLenses ''PhyloParam
296 makeLenses ''PhyloExport
297 makeLenses ''Software
298 makeLenses ''PhyloGroup
299 makeLenses ''PhyloLevel
300 makeLenses ''PhyloPeriod
301 makeLenses ''PhyloView
302 makeLenses ''PhyloQuery
303 makeLenses ''PhyloBranch
304 makeLenses ''PhyloNode
305 makeLenses ''PhyloEdge
306 makeLenses ''QueryFilter
307
308 -- | JSON instances
309 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
310 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
311 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
312 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
313 --
314 $(deriveJSON (unPrefix "_software_" ) ''Software )
315 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
316 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
317
318 -- | TODO XML instances
319