]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
Add the view types for exporting a phylo
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45 ------------------------------------------------------------------------
46 data PhyloExport =
47 PhyloExport { _phyloExport_param :: PhyloParam
48 , _phyloExport_data :: Phylo
49 } deriving (Generic)
50
51 -- | .phylo parameters
52 data PhyloParam =
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_params :: Hash
56 } deriving (Generic)
57
58 type Hash = Text
59
60 -- | Software
61 -- TODO move somewhere since it is generic
62 data Software =
63 Software { _software_name :: Text
64 , _software_version :: Text
65 } deriving (Generic)
66
67 ------------------------------------------------------------------------
68
69 -- | Phylo datatype of a phylomemy
70 -- Duration : time Segment of the whole Phylo
71 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
72 -- Periods : list of all the periods of a Phylo
73 data Phylo =
74 Phylo { _phylo_duration :: (Start, End)
75 , _phylo_foundations :: Vector Ngrams
76 , _phylo_periods :: [PhyloPeriod]
77 }
78 deriving (Generic, Show)
79
80
81 -- | Date : a simple Integer
82 type Date = Int
83
84 -- | UTCTime in seconds since UNIX epoch
85 -- type Start = POSIXTime
86 -- type End = POSIXTime
87 type Start = Date
88 type End = Date
89
90 -- | PhyloStep : steps of phylomemy on temporal axis
91 -- Period: tuple (start date, end date) of the step of the phylomemy
92 -- Levels: levels of granularity
93 data PhyloPeriod =
94 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
95 , _phylo_periodLevels :: [PhyloLevel]
96 }
97 deriving (Generic, Show)
98
99
100 -- | PhyloLevel : levels of phylomemy on level axis
101 -- Levels description:
102 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
103 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
104 -- Level 1: First level of clustering
105 -- Level N: Nth level of clustering
106 data PhyloLevel =
107 PhyloLevel { _phylo_levelId :: PhyloLevelId
108 , _phylo_levelGroups :: [PhyloGroup]
109 }
110 deriving (Generic, Show)
111
112
113 -- | PhyloGroup : group of ngrams at each level and step
114 -- Label : maybe has a label as text
115 -- Ngrams: set of terms that build the group
116 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
117 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
118 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
119 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
120 data PhyloGroup =
121 PhyloGroup { _phylo_groupId :: PhyloGroupId
122 , _phylo_groupLabel :: Text
123 , _phylo_groupNgrams :: [Int]
124 , _phylo_groupQuality :: Map Text Double
125 , _phylo_groupCooc :: Map (Int, Int) Double
126
127 , _phylo_groupPeriodParents :: [Pointer]
128 , _phylo_groupPeriodChilds :: [Pointer]
129
130 , _phylo_groupLevelParents :: [Pointer]
131 , _phylo_groupLevelChilds :: [Pointer]
132 }
133 deriving (Generic, Show, Eq, Ord)
134
135 data PhyloBranch =
136 PhyloBranch { _phylo_branchId :: (Level,Int)
137 , _phylo_branchLabel :: Text
138 , _phylo_branchGroups :: [PhyloGroupId]
139 }
140 deriving (Generic, Show)
141
142
143 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
144 type Level = Int
145 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
146 type Index = Int
147
148
149 type PhyloPeriodId = (Start, End)
150 type PhyloLevelId = (PhyloPeriodId, Level)
151 type PhyloGroupId = (PhyloLevelId, Index)
152 type PhyloBranchId = (Level, Index)
153
154
155 -- | Weight : A generic mesure that can be associated with an Id
156 type Weight = Double
157 -- | Pointer : A weighted linked with a given PhyloGroup
158 type Pointer = (PhyloGroupId, Weight)
159 -- | Ngrams : a contiguous sequence of n terms
160 type Ngrams = Text
161
162
163 -- | Clique : Set of ngrams cooccurring in the same Document
164 type Clique = Set Ngrams
165 -- | Support : Number of Documents where a Clique occurs
166 type Support = Int
167 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
168 type Fis = (Clique,Support)
169
170
171 -- | Document : a piece of Text linked to a Date
172 data Document = Document
173 { date :: Date
174 , text :: Text
175 } deriving (Show)
176
177
178 type Cluster = [PhyloGroup]
179
180
181 -- | A List of PhyloGroup in a PhyloGraph
182 type PhyloNodes = [PhyloGroup]
183 -- | A List of weighted links between some PhyloGroups in a PhyloGraph
184 type PhyloEdges = [((PhyloGroup,PhyloGroup),Weight)]
185 -- | The association as a Graph between a list of Nodes and a list of Edges
186 type PhyloGraph = (PhyloNodes,PhyloEdges)
187
188
189 data PhyloError = LevelDoesNotExist
190 | LevelUnassigned
191 deriving (Show)
192
193
194 -- | A List of Proximity mesures or strategies
195 data Proximity = WeightedLogJaccard | Hamming | FromPairs
196 -- | A List of Clustering methods
197 data Clustering = Louvain | RelatedComponents
198
199 data PairTo = Childs | Parents
200
201 -- | Views type
202
203 data EdgeType = Directed | UnDirected
204
205 data ViewGraph = ViewGraph
206 { _view_graphParam :: PhyloParam
207 , _view_graphLabel :: Text
208 , _view_graphEdgeType :: EdgeType
209 , _view_graphBranches :: [(PhyloBranchId,Text)]
210 , _view_graphNodes :: [ViewNode]
211 , _view_graphEdges :: [ViewEdge]
212 }
213
214 data ViewEdge = ViewEdge
215 { _view_edgeSource :: PhyloGroupId
216 , _view_edgeTarget :: PhyloGroupId
217 , _view_edgeWeight :: Weight
218 }
219
220 data ViewNode = ViewNode
221 { _view_nodeId :: PhyloGroupId
222 , _view_nodeLabel :: Text
223 , _view_nodeNgrams :: [Ngrams]
224 , _view_nodeMeta :: Map Text Double
225 , _view_nodeParent :: PhyloGroupId
226 }
227
228 -- | Lenses
229 makeLenses ''Phylo
230 makeLenses ''PhyloParam
231 makeLenses ''PhyloExport
232 makeLenses ''Software
233 makeLenses ''PhyloGroup
234 makeLenses ''PhyloLevel
235 makeLenses ''PhyloPeriod
236 makeLenses ''PhyloBranch
237
238 -- | JSON instances
239 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
240 $(deriveJSON (unPrefix "_phylo_period" ) 'PhyloPeriod )
241 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
242 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
243 $(deriveJSON (unPrefix "_phylo_branch" ) ''PhyloBranch )
244 --
245 $(deriveJSON (unPrefix "_software_" ) ''Software )
246 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
247 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
248
249 -- | TODO XML instances
250