]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Types/Phylo.hs
Partial support for bidirectional PhyloData parsing
[gargantext.git] / src / Gargantext / Core / Types / Phylo.hs
1 {-|
2 Module : Gargantext.Types.Phylo
3 Description : Main Types for Phylomemy
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 Phylomemy was first described in Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns in science evolution—the rise and fall of scientific fields. PloS one 8, e54847.
19 .
20 -}
21
22 {-# LANGUAGE DerivingStrategies #-}
23 {-# LANGUAGE LambdaCase #-}
24 {-# LANGUAGE TemplateHaskell #-}
25 {-# LANGUAGE TypeApplications #-}
26
27 module Gargantext.Core.Types.Phylo where
28
29 import Control.Monad.Fail (fail)
30 import Control.Lens (makeLenses)
31
32 import Data.Aeson
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Monoid
35 import Data.Swagger
36 import Data.Text (Text)
37 import Data.Time.Clock.POSIX (POSIXTime)
38 import qualified Data.Text as T
39 import Test.QuickCheck
40 import Test.QuickCheck.Instances.Text()
41
42 import GHC.Generics (Generic)
43
44 import Gargantext.Prelude
45 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
46
47 ------------------------------------------------------------------------
48 -- | Phylo datatype descriptor of a phylomemy
49 -- Duration : time Segment of the whole phylomemy in UTCTime format (start,end)
50 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
51 -- Steps : list of all steps to build the phylomemy
52 data Phylo = Phylo { _phylo_Duration :: (Start, End)
53 , _phylo_Ngrams :: [Ngram]
54 , _phylo_Periods :: [PhyloPeriod]
55 } deriving (Generic)
56
57 -- | UTCTime in seconds since UNIX epoch
58 type Start = POSIXTime
59 type End = POSIXTime
60
61 -- | Indexed Ngram
62 type Ngram = (NgramId, Text)
63 type NgramId = Int
64
65 -- | PhyloStep : steps of phylomemy on temporal axis
66 -- Period: tuple (start date, end date) of the step of the phylomemy
67 -- Levels: levels of granularity
68 data PhyloPeriod = PhyloPeriod { _phylo_PeriodId :: PhyloPeriodId
69 , _phylo_PeriodLevels :: [PhyloLevel]
70 } deriving (Generic)
71
72 type PhyloPeriodId = (Start, End)
73
74 -- | PhyloLevel : levels of phylomemy on level axis
75 -- Levels description:
76 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
77 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
78 -- Level 1: First level of clustering
79 -- Level N: Nth level of clustering
80 data PhyloLevel = PhyloLevel { _phylo_LevelId :: PhyloLevelId
81 , _phylo_LevelGroups :: [PhyloGroup]
82 } deriving (Generic)
83
84 type PhyloLevelId = (PhyloPeriodId, Int)
85
86 -- | PhyloGroup : group of ngrams at each level and step
87 -- Label : maybe has a label as text
88 -- Ngrams: set of terms that build the group
89 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
90 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
91 data PhyloGroup = PhyloGroup { _phylo_GroupId :: PhyloGroupId
92 , _phylo_GroupLabel :: Maybe Text
93 , _phylo_GroupNgrams :: [NgramId]
94
95 , _phylo_GroupPeriodParents :: [Edge]
96 , _phylo_GroupPeriodChilds :: [Edge]
97
98 , _phylo_GroupLevelParents :: [Edge]
99 , _phylo_GroupLevelChilds :: [Edge]
100 } deriving (Generic)
101
102 type PhyloGroupId = (PhyloLevelId, Int)
103 type Edge = (PhyloGroupId, Weight)
104 type Weight = Double
105
106 ------------------------------------------------------------------------
107 -- | Phylo 'GraphData' datatype descriptor. It must be isomorphic to
108 -- the 'GraphData' type of the purecript frontend.
109
110 data GraphData =
111 GraphData {
112 _gd__subgraph_cnt :: Int
113 , _gd_directed :: Bool
114 , _gd_edges :: [EdgeData]
115 , _gd_objects :: [ObjectData]
116 , _gd_strict :: Bool
117 } deriving (Show, Eq, Generic)
118
119 -- temp placeholder.
120 newtype ObjectData = ObjectData { _ObjectData :: Value }
121 deriving stock (Show, Eq, Generic)
122 deriving newtype (FromJSON, ToJSON)
123
124 data EdgeCommonData =
125 EdgeCommonData {
126 _ed_color :: !Text
127 , _ed_head :: !Int
128 , _ed_pos :: !Text
129 , _ed_tail :: !Int
130 , _ed_width :: !Text
131 } deriving (Show, Eq, Generic)
132
133 newtype GvId = GvId { _GvId :: Int }
134 deriving (Show, Eq, Generic)
135
136 data EdgeData
137 = GroupToAncestor !GvId !EdgeCommonData !GroupToAncestorData
138 | GroupToGroup !GvId !EdgeCommonData !GroupToGroupData
139 | BranchToGroup !GvId !EdgeCommonData !BranchToGroupData
140 deriving (Show, Eq, Generic)
141
142 data GroupToAncestorData
143 = GroupToAncestorData
144 { _gta_arrowhead :: !Text
145 , _gta_lbl :: !Text
146 , _gta_penwidth :: !Text
147 , _gta_style :: !Text
148 } deriving (Show, Eq, Generic)
149
150 data GroupToGroupData
151 = GroupToGroupData
152 { _gtg_constraint :: !Text
153 , _gtg_lbl :: !Text
154 , _gtg_penwidth :: !Text
155 } deriving (Show, Eq, Generic)
156
157 data BranchToGroupData
158 = BranchToGroupData
159 { _btg_arrowhead :: !Text
160 , _btg_style :: Maybe Text
161 } deriving (Show, Eq, Generic)
162
163 -- | Lenses
164 makeLenses ''Phylo
165 makeLenses ''PhyloPeriod
166 makeLenses ''PhyloLevel
167 makeLenses ''PhyloGroup
168
169 -- | JSON instances
170 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
171 $(deriveJSON (unPrefix "_phylo_Period" ) ''PhyloPeriod )
172 $(deriveJSON (unPrefix "_phylo_Level" ) ''PhyloLevel )
173 $(deriveJSON (unPrefix "_phylo_Group" ) ''PhyloGroup )
174
175 instance ToJSON GraphData where
176 toJSON GraphData{..} = object
177 [ "_subgraph_cnt" .= _gd__subgraph_cnt
178 , "directed" .= _gd_directed
179 , "edges" .= _gd_edges
180 , "objects" .= _gd_objects
181 , "strict" .= _gd_strict
182 ]
183
184 instance FromJSON GraphData where
185 parseJSON = withObject "GraphData" $ \o -> do
186 _gd__subgraph_cnt <- o .: "_subgraph_cnt"
187 _gd_directed <- o .: "directed"
188 _gd_edges <- o .: "edges"
189 _gd_objects <- o .: "objects"
190 _gd_strict <- o .: "strict"
191 pure GraphData{..}
192
193 instance ToJSON GvId where
194 toJSON GvId{..} = toJSON _GvId
195 instance FromJSON GvId where
196 parseJSON v = GvId <$> parseJSON v
197
198 instance ToJSON EdgeData where
199 toJSON = \case
200 GroupToAncestor gvid commonData edgeTypeData
201 -> mkNode "ancestorLink" gvid commonData edgeTypeData
202 GroupToGroup gvid commonData edgeTypeData
203 -> mkNode "link" gvid commonData edgeTypeData
204 BranchToGroup gvid commonData edgeTypeData
205 -> mkNode "branchLink" gvid commonData edgeTypeData
206
207 mkNode :: ToJSON a => Text -> GvId -> EdgeCommonData -> a -> Value
208 mkNode edgeType gvid commonData edgeTypeData =
209 let commonDataJSON = toJSON commonData
210 edgeTypeDataJSON = toJSON edgeTypeData
211 header = object $ [ "edgeType" .= toJSON edgeType
212 , "_gvid" .= toJSON gvid
213 ]
214 in case (commonDataJSON, edgeTypeDataJSON, header) of
215 (Object hdr, Object cdJSON, Object etDataJSON)
216 -> Object $ hdr <> cdJSON <> etDataJSON
217 _ -> panic "[Gargantext.Core.Types.Phylo.mkNode] impossible: commonData, header or edgeTypeDataJSON didn't convert back to JSON Object."
218
219
220 instance FromJSON EdgeData where
221 parseJSON = withObject "EdgeData" $ \o -> do
222 edgeType <- o .: "edgeType"
223 gvid <- o .: "_gvid"
224 _ed_color <- o .: "color"
225 _ed_head <- o .: "head"
226 _ed_pos <- o .: "pos"
227 _ed_tail <- o .: "tail"
228 _ed_width <- o .: "width"
229 case (edgeType :: Text) of
230 "ancestorLink" -> GroupToAncestor <$> pure gvid <*> pure EdgeCommonData{..} <*> parseJSON (Object o)
231 "link" -> GroupToGroup <$> pure gvid <*> pure EdgeCommonData{..} <*> parseJSON (Object o)
232 "branchLink" -> BranchToGroup <$> pure gvid <*> pure EdgeCommonData{..} <*> parseJSON (Object o)
233 _ -> fail $ "EdgeData: unrecognised edgeType for Phylo graph: " <> T.unpack edgeType
234
235 instance ToJSON EdgeCommonData where
236 toJSON EdgeCommonData{..} = object
237 [ "color" .= _ed_color
238 , "head" .= _ed_head
239 , "pos" .= _ed_pos
240 , "tail" .= _ed_tail
241 , "width" .= _ed_width
242 ]
243
244 instance ToJSON GroupToAncestorData where
245 toJSON GroupToAncestorData{..} =
246 object [ "arrowhead" .= _gta_arrowhead
247 , "lbl" .= _gta_lbl
248 , "penwidth" .= _gta_penwidth
249 , "style" .= _gta_style
250 ]
251
252 instance FromJSON GroupToAncestorData where
253 parseJSON = withObject "GroupToAncestorData" $ \o -> do
254 _gta_arrowhead <- o .: "arrowhead"
255 _gta_lbl <- o .: "lbl"
256 _gta_penwidth <- o .: "penwidth"
257 _gta_style <- o .: "style"
258 pure GroupToAncestorData{..}
259
260 instance ToJSON GroupToGroupData where
261 toJSON GroupToGroupData{..} =
262 object [ "constraint" .= _gtg_constraint
263 , "lbl" .= _gtg_lbl
264 , "penwidth" .= _gtg_penwidth
265 ]
266
267 instance FromJSON GroupToGroupData where
268 parseJSON = withObject "BranchToGroupData" $ \o -> do
269 _gtg_constraint <- o .: "constraint"
270 _gtg_lbl <- o .: "lbl"
271 _gtg_penwidth <- o .: "penwidth"
272 pure GroupToGroupData{..}
273
274 instance ToJSON BranchToGroupData where
275 toJSON BranchToGroupData{..} =
276 object [ "arrowhead" .= _btg_arrowhead
277 , "style" .= _btg_style
278 ]
279
280 instance FromJSON BranchToGroupData where
281 parseJSON = withObject "BranchToGroupData" $ \o -> do
282 _btg_arrowhead <- o .: "arrowhead"
283 _btg_style <- o .:? "style"
284 pure BranchToGroupData{..}
285
286
287 -- | ToSchema instances
288 instance ToSchema Phylo where
289 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
290 instance ToSchema PhyloPeriod where
291 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_Period")
292 instance ToSchema PhyloLevel where
293 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_Level")
294 instance ToSchema PhyloGroup where
295 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_Group")
296 instance ToSchema BranchToGroupData where
297 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_btg_")
298 instance ToSchema GroupToGroupData where
299 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_gtg_")
300 instance ToSchema GroupToAncestorData where
301 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_gta_")
302 instance ToSchema EdgeCommonData where
303 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ed_")
304 instance ToSchema ObjectData where
305 declareNamedSchema _ = pure $ NamedSchema (Just "ObjectData") $ mempty
306 instance ToSchema GvId where
307 declareNamedSchema = genericDeclareNamedSchema defaultSchemaOptions
308 instance ToSchema EdgeData where
309 declareNamedSchema = genericDeclareNamedSchema defaultSchemaOptions
310 instance ToSchema GraphData where
311 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_gd_")
312
313 -- | Arbitrary instances
314 instance Arbitrary BranchToGroupData where
315 arbitrary = BranchToGroupData <$> arbitrary <*> arbitrary
316 instance Arbitrary GroupToGroupData where
317 arbitrary = GroupToGroupData <$> arbitrary
318 <*> arbitrary
319 <*> arbitrary
320 instance Arbitrary GroupToAncestorData where
321 arbitrary = GroupToAncestorData <$> arbitrary
322 <*> arbitrary
323 <*> arbitrary
324 <*> arbitrary
325 instance Arbitrary EdgeCommonData where
326 arbitrary = EdgeCommonData <$> arbitrary
327 <*> arbitrary
328 <*> arbitrary
329 <*> arbitrary
330 <*> arbitrary
331 instance Arbitrary ObjectData where
332 arbitrary = ObjectData <$> (String <$> arbitrary) -- temporary, it doesn't matter.
333 instance Arbitrary GvId where
334 arbitrary = GvId <$> arbitrary
335 instance Arbitrary EdgeData where
336 arbitrary = oneof [ GroupToAncestor <$> arbitrary <*> arbitrary <*> arbitrary
337 , GroupToGroup <$> arbitrary <*> arbitrary <*> arbitrary
338 , BranchToGroup <$> arbitrary <*> arbitrary <*> arbitrary
339 ]
340 instance Arbitrary GraphData where
341 arbitrary = GraphData <$> arbitrary
342 <*> arbitrary
343 <*> arbitrary
344 <*> arbitrary
345 <*> arbitrary