]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
Merge branch 'dev-phylo' of https://gitlab.iscpif.fr/gargantext/haskell-gargantext...
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29
30 module Gargantext.Viz.Phylo where
31
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
36 import Data.Set (Set)
37 import Data.Map (Map)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
44
45 ------------------------------------------------------------------------
46 data PhyloExport =
47 PhyloExport { _phyloExport_param :: PhyloParam
48 , _phyloExport_data :: Phylo
49 } deriving (Generic)
50
51 -- | .phylo parameters
52 data PhyloParam =
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_params :: Hash
56 } deriving (Generic)
57
58 type Hash = Text
59
60 -- | Software
61 -- TODO move somewhere since it is generic
62 data Software =
63 Software { _software_name :: Text
64 , _software_version :: Text
65 } deriving (Generic)
66
67 ------------------------------------------------------------------------
68
69 -- | Phylo datatype descriptor of a phylomemy
70 -- Duration : time Segment of the whole phylomemy (start,end)
71 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
72 -- Steps : list of all steps to build the phylomemy
73 data Phylo =
74 Phylo { _phylo_duration :: (Start, End)
75 , _phylo_ngrams :: PhyloNgrams
76 , _phylo_periods :: [PhyloPeriod]
77 , _phylo_branches :: [PhyloBranch]
78 }
79 deriving (Generic, Show)
80
81
82 -- | Date : a simple Integer
83 type Date = Int
84
85 -- | UTCTime in seconds since UNIX epoch
86 -- type Start = POSIXTime
87 -- type End = POSIXTime
88 type Start = Date
89 type End = Date
90
91 -- | PhyloStep : steps of phylomemy on temporal axis
92 -- Period: tuple (start date, end date) of the step of the phylomemy
93 -- Levels: levels of granularity
94 data PhyloPeriod =
95 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
96 , _phylo_periodLevels :: [PhyloLevel]
97 }
98 deriving (Generic, Show)
99
100
101 -- | PhyloLevel : levels of phylomemy on level axis
102 -- Levels description:
103 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
104 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
105 -- Level 1: First level of clustering
106 -- Level N: Nth level of clustering
107 data PhyloLevel =
108 PhyloLevel { _phylo_levelId :: PhyloLevelId
109 , _phylo_levelGroups :: [PhyloGroup]
110 }
111 deriving (Generic, Show)
112
113
114 -- | PhyloGroup : group of ngrams at each level and step
115 -- Label : maybe has a label as text
116 -- Ngrams: set of terms that build the group
117 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
118 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
119 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
120 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
121 data PhyloGroup =
122 PhyloGroup { _phylo_groupId :: PhyloGroupId
123 , _phylo_groupLabel :: Text
124 , _phylo_groupNgrams :: [Int]
125 , _phylo_groupQuality :: Map Text Double
126 , _phylo_groupCooc :: Map (Int, Int) Double
127
128 , _phylo_groupPeriodParents :: [Pointer]
129 , _phylo_groupPeriodChilds :: [Pointer]
130
131 , _phylo_groupLevelParents :: [Pointer]
132 , _phylo_groupLevelChilds :: [Pointer]
133 }
134 deriving (Generic, Show, Eq, Ord)
135
136 data PhyloBranch =
137 PhyloBranch { _phylo_branchId :: (Level,Int)
138 , _phylo_branchLabel :: Text
139 , _phylo_branchGroups :: [PhyloGroupId]
140 }
141 deriving (Generic, Show)
142
143
144 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
145 type Level = Int
146 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
147 type Index = Int
148
149
150 type PhyloPeriodId = (Start, End)
151 type PhyloLevelId = (PhyloPeriodId, Level)
152 type PhyloGroupId = (PhyloLevelId, Index)
153 type PhyloBranchId = (Level, Index)
154
155
156 -- | Weight : A generic mesure that can be associated with an Id
157 type Weight = Double
158 -- | Pointer : A weighted linked with a given PhyloGroup
159 type Pointer = (PhyloGroupId, Weight)
160
161
162 -- | Ngrams : a contiguous sequence of n terms
163 type Ngrams = Text
164 -- | PhyloNgrams : Vector of all the Ngrams (PhyloGroup of level -1) used within a Phylo
165 type PhyloNgrams = Vector Ngrams
166
167
168 -- | Clique : Set of ngrams cooccurring in the same Document
169 type Clique = Set Ngrams
170 -- | Support : Number of Documents where a Clique occurs
171 type Support = Int
172 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
173 type Fis = (Clique,Support)
174
175
176 -- | Document : a piece of Text linked to a Date
177 data Document = Document
178 { date :: Date
179 , text :: Text
180 } deriving (Show)
181
182
183 type Cluster = [PhyloGroup]
184
185
186 -- | A List of PhyloGroup in a PhyloGraph
187 type PhyloNodes = [PhyloGroup]
188 -- | A List of weighted links between some PhyloGroups in a PhyloGraph
189 type PhyloEdges = [((PhyloGroup,PhyloGroup),Weight)]
190 -- | The association as a Graph between a list of Nodes and a list of Edges
191 type PhyloGraph = (PhyloNodes,PhyloEdges)
192
193
194 data PhyloError = LevelDoesNotExist
195 | LevelUnassigned
196 deriving (Show)
197
198
199 -- | A List of Proximity mesures or strategies
200 data Proximity = WeightedLogJaccard | Hamming | FromPairs
201 -- | A List of Clustering methods
202 data Clustering = Louvain | RelatedComponents
203
204
205 data PairTo = Childs | Parents
206
207 -- | Lenses
208 makeLenses ''Phylo
209 makeLenses ''PhyloParam
210 makeLenses ''PhyloExport
211 makeLenses ''Software
212 makeLenses ''PhyloGroup
213 makeLenses ''PhyloLevel
214 makeLenses ''PhyloPeriod
215 makeLenses ''PhyloBranch
216
217 -- | JSON instances
218 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
219 $(deriveJSON (unPrefix "_phylo_period" ) 'PhyloPeriod )
220 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
221 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
222 $(deriveJSON (unPrefix "_phylo_branch" ) ''PhyloBranch )
223 --
224 $(deriveJSON (unPrefix "_software_" ) ''Software )
225 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
226 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
227
228 -- | TODO XML instances
229