]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
Add the clustering up to level 2 and more
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28
29 module Gargantext.Viz.Phylo where
30
31 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Maybe (Maybe)
34 import Data.Text (Text)
35 import Data.Set (Set)
36 import Data.Map (Map)
37 import Data.Vector (Vector)
38 import Data.Time.Clock.POSIX (POSIXTime)
39 import GHC.Generics (Generic)
40 import Gargantext.Database.Schema.Ngrams (NgramsId)
41 import Gargantext.Core.Utils.Prefix (unPrefix)
42 import Gargantext.Prelude
43
44 ------------------------------------------------------------------------
45 data PhyloExport =
46 PhyloExport { _phyloExport_param :: PhyloParam
47 , _phyloExport_data :: Phylo
48 } deriving (Generic)
49
50 -- | .phylo parameters
51 data PhyloParam =
52 PhyloParam { _phyloParam_version :: Text -- Double ?
53 , _phyloParam_software :: Software
54 , _phyloParam_params :: Hash
55 } deriving (Generic)
56
57 type Hash = Text
58
59 -- | Software
60 -- TODO move somewhere since it is generic
61 data Software =
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic)
65
66 ------------------------------------------------------------------------
67 -- | Phylo datatype descriptor of a phylomemy
68 -- Duration : time Segment of the whole phylomemy (start,end)
69 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
70 -- Steps : list of all steps to build the phylomemy
71 data Phylo =
72 Phylo { _phylo_duration :: (Start, End)
73 , _phylo_ngrams :: PhyloNgrams
74 , _phylo_periods :: [PhyloPeriod]
75 , _phylo_branches :: [PhyloBranch]
76 }
77 deriving (Generic, Show)
78
79
80 -- | Date : a simple Integer
81 type Date = Int
82
83 -- | UTCTime in seconds since UNIX epoch
84 -- type Start = POSIXTime
85 -- type End = POSIXTime
86 type Start = Date
87 type End = Date
88
89 -- | PhyloStep : steps of phylomemy on temporal axis
90 -- Period: tuple (start date, end date) of the step of the phylomemy
91 -- Levels: levels of granularity
92 data PhyloPeriod =
93 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
94 , _phylo_periodLevels :: [PhyloLevel]
95 }
96 deriving (Generic, Show)
97
98
99 -- | PhyloLevel : levels of phylomemy on level axis
100 -- Levels description:
101 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
102 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
103 -- Level 1: First level of clustering
104 -- Level N: Nth level of clustering
105 data PhyloLevel =
106 PhyloLevel { _phylo_levelId :: PhyloLevelId
107 , _phylo_levelGroups :: [PhyloGroup]
108 }
109 deriving (Generic, Show)
110
111
112 -- | PhyloGroup : group of ngrams at each level and step
113 -- Label : maybe has a label as text
114 -- Ngrams: set of terms that build the group
115 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
116 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
117 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
118 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
119 data PhyloGroup =
120 PhyloGroup { _phylo_groupId :: PhyloGroupId
121 , _phylo_groupLabel :: Text
122 , _phylo_groupNgrams :: [Int]
123 , _phylo_groupQuality :: Map Text Double
124 , _phylo_groupCooc :: Map (Int, Int) Double
125
126 , _phylo_groupPeriodParents :: [Pointer]
127 , _phylo_groupPeriodChilds :: [Pointer]
128
129 , _phylo_groupLevelParents :: [Pointer]
130 , _phylo_groupLevelChilds :: [Pointer]
131 }
132 deriving (Generic, Show, Eq)
133
134 data PhyloBranch =
135 PhyloBranch { _phylo_branchId :: (Int,Int)
136 , _phylo_branchLabel :: Text
137 , _phylo_branchGroups :: [PhyloGroupId]
138 }
139 deriving (Generic, Show)
140
141 type PhyloPeriodId = (Start, End)
142 type PhyloLevelId = (PhyloPeriodId, Int)
143 type PhyloGroupId = (PhyloLevelId, Int)
144
145 type Pointer = (PhyloGroupId, Weight)
146 type Weight = Double
147
148 type PhyloBranchId = (Int, Int)
149
150
151 -- | Ngrams : a contiguous sequence of n terms
152 type Ngrams = Text
153 -- | PhyloNgrams : Vector of all the Ngrams (PhyloGroup of level -1) used within a Phylo
154 type PhyloNgrams = Vector Ngrams
155
156
157 -- | Clique : Set of ngrams cooccurring in the same Document
158 type Clique = Set Ngrams
159 -- | Support : Number of Documents where a Clique occurs
160 type Support = Int
161 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
162 type Fis = Map Clique Support
163
164
165 data Direction = From | To
166 deriving (Show, Eq)
167
168 data LevelLabel = Level_m1 | Level_0 | Level_1 | Level_mN | Level_N | Level_pN
169 deriving (Show, Eq, Enum, Bounded)
170
171 data Level =
172 Level { _levelLabel :: LevelLabel
173 , _levelValue :: Int
174 } deriving (Show, Eq)
175
176 data LevelLink =
177 LevelLink { _levelFrom :: Level
178 , _levelTo :: Level
179 } deriving (Show)
180
181 -- | Document : a piece of Text linked to a Date
182 data Document = Document
183 { date :: Date
184 , text :: Text
185 } deriving (Show)
186
187 data PhyloError = LevelDoesNotExist
188 | LevelUnassigned
189 deriving (Show)
190
191
192 type PhyloGraph = (PhyloNodes,PhyloEdges)
193 type PhyloNodes = [PhyloGroup]
194 type PhyloEdges = [(((PhyloGroup,PhyloGroup)),Double)]
195
196
197 data Proximity = WeightedLogJaccard | Hamming | FromPairs
198
199 data Clustering = Louvain | RelatedComponents
200
201
202 data PairTo = Childs | Parents
203
204 -- | Lenses
205 makeLenses ''Phylo
206 makeLenses ''PhyloParam
207 makeLenses ''PhyloExport
208 makeLenses ''Software
209 makeLenses ''PhyloGroup
210 makeLenses ''PhyloLevel
211 makeLenses ''PhyloPeriod
212 makeLenses ''Level
213 makeLenses ''LevelLink
214 makeLenses ''PhyloBranch
215
216 -- | JSON instances
217 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
218 $(deriveJSON (unPrefix "_phylo_period" ) 'PhyloPeriod )
219 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
220 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
221 $(deriveJSON (unPrefix "_phylo_branch" ) ''PhyloBranch )
222 --
223 $(deriveJSON (unPrefix "_software_" ) ''Software )
224 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
225 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
226
227 -- | TODO XML instances
228