]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
Whooo class types ...
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45 ------------------------------------------------------------------------
46 data PhyloExport =
47 PhyloExport { _phyloExport_param :: PhyloParam
48 , _phyloExport_data :: Phylo
49 } deriving (Generic)
50
51 -- | .phylo parameters
52 data PhyloParam =
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_params :: Hash
56 } deriving (Generic)
57
58 type Hash = Text
59
60 -- | Software
61 -- TODO move somewhere since it is generic
62 data Software =
63 Software { _software_name :: Text
64 , _software_version :: Text
65 } deriving (Generic)
66
67 ------------------------------------------------------------------------
68
69 -- | Phylo datatype descriptor of a phylomemy
70 -- Duration : time Segment of the whole phylomemy (start,end)
71 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
72 -- Steps : list of all steps to build the phylomemy
73 data Phylo =
74 Phylo { _phylo_duration :: (Start, End)
75 , _phylo_ngrams :: PhyloNgrams
76 , _phylo_periods :: [PhyloPeriod]
77 , _phylo_branches :: [PhyloBranch]
78 }
79 deriving (Generic, Show)
80
81
82 -- | Date : a simple Integer
83 type Date = Int
84
85 -- | UTCTime in seconds since UNIX epoch
86 -- type Start = POSIXTime
87 -- type End = POSIXTime
88 type Start = Date
89 type End = Date
90
91 -- | PhyloStep : steps of phylomemy on temporal axis
92 -- Period: tuple (start date, end date) of the step of the phylomemy
93 -- Levels: levels of granularity
94 data PhyloPeriod =
95 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
96 , _phylo_periodLevels :: [PhyloLevel]
97 }
98 deriving (Generic, Show)
99
100
101 -- | PhyloLevel : levels of phylomemy on level axis
102 -- Levels description:
103 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
104 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
105 -- Level 1: First level of clustering
106 -- Level N: Nth level of clustering
107 data PhyloLevel =
108 PhyloLevel { _phylo_levelId :: PhyloLevelId
109 , _phylo_levelGroups :: [PhyloGroup]
110 }
111 deriving (Generic, Show)
112
113
114 -- | PhyloGroup : group of ngrams at each level and step
115 -- Label : maybe has a label as text
116 -- Ngrams: set of terms that build the group
117 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
118 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
119 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
120 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
121 data PhyloGroup =
122 PhyloGroup { _phylo_groupId :: PhyloGroupId
123 , _phylo_groupLabel :: Text
124 , _phylo_groupNgrams :: [Int]
125 , _phylo_groupQuality :: Map Text Double
126 , _phylo_groupCooc :: Map (Int, Int) Double
127
128 , _phylo_groupPeriodParents :: [Pointer]
129 , _phylo_groupPeriodChilds :: [Pointer]
130
131 , _phylo_groupLevelParents :: [Pointer]
132 , _phylo_groupLevelChilds :: [Pointer]
133 }
134 deriving (Generic, Show, Eq)
135
136 data PhyloBranch =
137 PhyloBranch { _phylo_branchId :: (Int,Int)
138 , _phylo_branchLabel :: Text
139 , _phylo_branchGroups :: [PhyloGroupId]
140 }
141 deriving (Generic, Show)
142
143
144 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
145 type Level = Int
146 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
147 type Index = Int
148
149
150 type PhyloPeriodId = (Start, End)
151 type PhyloLevelId = (PhyloPeriodId, Level)
152 type PhyloGroupId = (PhyloLevelId, Index)
153 type PhyloBranchId = (Level, Index)
154
155
156 -- | Weight : A generic mesure that can be associated with an Id
157 type Weight = Double
158 -- | Pointer : A weighted linked with a given PhyloGroup
159 type Pointer = (PhyloGroupId, Weight)
160
161
162 -- | Ngrams : a contiguous sequence of n terms
163 type Ngrams = Text
164 -- | PhyloNgrams : Vector of all the Ngrams (PhyloGroup of level -1) used within a Phylo
165 type PhyloNgrams = Vector Ngrams
166
167
168 -- | Clique : Set of ngrams cooccurring in the same Document
169 type Clique = Set Ngrams
170 -- | Support : Number of Documents where a Clique occurs
171 type Support = Int
172 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
173 type Fis = (Clique,Support)
174
175
176 -- | Document : a piece of Text linked to a Date
177 data Document = Document
178 { date :: Date
179 , text :: Text
180 } deriving (Show)
181
182
183 type Cluster = [PhyloGroup]
184
185
186 class AppendToPhylo a where
187 addPhyloLevel :: Level -> Map (Date,Date) [a] -> Phylo -> Phylo
188 initPhyloGroup :: a -> PhyloGroup
189
190 -- | A List of PhyloGroup in a PhyloGraph
191 type PhyloNodes = [PhyloGroup]
192 -- | A List of weighted links between some PhyloGroups in a PhyloGraph
193 type PhyloEdges = [(((PhyloGroup,PhyloGroup)),Weight)]
194 -- | The association as a Graph between a list of Nodes and a list of Edges
195 type PhyloGraph = (PhyloNodes,PhyloEdges)
196
197
198 data PhyloError = LevelDoesNotExist
199 | LevelUnassigned
200 deriving (Show)
201
202
203 -- | A List of Proximity mesures or strategies
204 data Proximity = WeightedLogJaccard | Hamming | FromPairs
205 -- | A List of Clustering methods
206 data Clustering = Louvain | RelatedComponents
207
208
209 data PairTo = Childs | Parents
210
211 -- | Lenses
212 makeLenses ''Phylo
213 makeLenses ''PhyloParam
214 makeLenses ''PhyloExport
215 makeLenses ''Software
216 makeLenses ''PhyloGroup
217 makeLenses ''PhyloLevel
218 makeLenses ''PhyloPeriod
219 makeLenses ''PhyloBranch
220
221 -- | JSON instances
222 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
223 $(deriveJSON (unPrefix "_phylo_period" ) 'PhyloPeriod )
224 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
225 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
226 $(deriveJSON (unPrefix "_phylo_branch" ) ''PhyloBranch )
227 --
228 $(deriveJSON (unPrefix "_software_" ) ''Software )
229 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
230 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
231
232 -- | TODO XML instances
233