]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/API.hs
[TOOLING] sql queries to remove unused documents and ngrams from database. Usage...
[gargantext.git] / src / Gargantext / Core / Viz / Graph / API.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 {-# LANGUAGE OverloadedLists #-} -- allows to write Map and HashMap as lists
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.Core.Viz.Graph.API
17 where
18
19 import Control.Lens (set, (^.), _Just, (^?), at)
20 import Data.Aeson
21 import Data.Maybe (fromMaybe)
22 import Data.Swagger
23 import Data.Text hiding (head)
24 import Debug.Trace (trace)
25 import GHC.Generics (Generic)
26 import Gargantext.API.Admin.Orchestrator.Types
27 import Gargantext.API.Ngrams.Tools
28 import Gargantext.API.Prelude
29 import Gargantext.Core.Methods.Distances (Distance(..), GraphMetric(..), withMetric)
30 import Gargantext.Core.NodeStory
31 import Gargantext.Core.Types.Main
32 import Gargantext.Core.Viz.Graph
33 import Gargantext.Core.Viz.Graph.GEXF ()
34 import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
35 import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
36 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
37 import Gargantext.Database.Action.Node (mkNodeWithParent)
38 import Gargantext.Database.Admin.Config
39 import Gargantext.Database.Admin.Types.Node
40 import Gargantext.Database.Prelude (Cmd)
41 import Gargantext.Database.Query.Table.Node
42 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
43 import Gargantext.Database.Query.Table.Node.Select
44 import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
45 import Gargantext.Database.Query.Table.Node.User (getNodeUser)
46 import Gargantext.Database.Schema.Node
47 import Gargantext.Database.Schema.Ngrams
48 import Gargantext.Prelude
49 import Servant
50 import Servant.Job.Async
51 import Servant.XML
52 import qualified Data.HashMap.Strict as HashMap
53
54 ------------------------------------------------------------------------
55 -- | There is no Delete specific API for Graph since it can be deleted
56 -- as simple Node.
57 type GraphAPI = Get '[JSON] HyperdataGraphAPI
58 :<|> "async" :> GraphAsyncAPI
59 :<|> "clone"
60 :> ReqBody '[JSON] HyperdataGraphAPI
61 :> Post '[JSON] NodeId
62 :<|> "gexf" :> Get '[XML] (Headers '[Servant.Header "Content-Disposition" Text] Graph)
63 :<|> "versions" :> GraphVersionsAPI
64
65 data GraphVersions =
66 GraphVersions { gv_graph :: Maybe Int
67 , gv_repo :: Int
68 }
69 deriving (Show, Generic)
70
71 instance ToJSON GraphVersions
72 instance ToSchema GraphVersions
73
74 graphAPI :: UserId -> NodeId -> GargServer GraphAPI
75 graphAPI u n = getGraph u n
76 :<|> graphAsync u n
77 :<|> graphClone u n
78 :<|> getGraphGexf u n
79 :<|> graphVersionsAPI u n
80
81 ------------------------------------------------------------------------
82 --getGraph :: UserId -> NodeId -> GargServer HyperdataGraphAPI
83 getGraph :: FlowCmdM env err m
84 => UserId
85 -> NodeId
86 -> m HyperdataGraphAPI
87 getGraph _uId nId = do
88 nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
89
90 let
91 graph = nodeGraph ^. node_hyperdata . hyperdataGraph
92 camera = nodeGraph ^. node_hyperdata . hyperdataCamera
93 cId = maybe (panic "[G.V.G.API] Node has no parent")
94 identity
95 $ nodeGraph ^. node_parent_id
96
97 listId <- defaultList cId
98 repo <- getRepo' [listId]
99
100 -- TODO Distance in Graph params
101 case graph of
102 Nothing -> do
103 let defaultMetric = Order1
104 graph' <- computeGraph cId (withMetric defaultMetric) NgramsTerms repo
105 mt <- defaultGraphMetadata cId "Title" repo defaultMetric
106 let
107 graph'' = set graph_metadata (Just mt) graph'
108 hg = HyperdataGraphAPI graph'' camera
109 -- _ <- updateHyperdata nId hg
110 _ <- updateHyperdata nId (HyperdataGraph (Just graph'') camera)
111 pure $ trace "[G.V.G.API] Graph empty, computing" hg
112
113 Just graph' -> pure $ trace "[G.V.G.API] Graph exists, returning" $
114 HyperdataGraphAPI graph' camera
115
116
117 --recomputeGraph :: UserId -> NodeId -> Maybe GraphMetric -> GargNoServer Graph
118 recomputeGraph :: FlowCmdM env err m
119 => UserId
120 -> NodeId
121 -> Maybe GraphMetric
122 -> m Graph
123 recomputeGraph _uId nId maybeDistance = do
124 nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
125 let
126 graph = nodeGraph ^. node_hyperdata . hyperdataGraph
127 camera = nodeGraph ^. node_hyperdata . hyperdataCamera
128 graphMetadata = graph ^? _Just . graph_metadata . _Just
129 listVersion = graph ^? _Just . graph_metadata . _Just . gm_list . lfg_version
130 graphMetric = case maybeDistance of
131 Nothing -> graph ^? _Just . graph_metadata . _Just . gm_metric
132 _ -> maybeDistance
133
134 let
135 cId = maybe (panic "[G.C.V.G.API.recomputeGraph] Node has no parent")
136 identity
137 $ nodeGraph ^. node_parent_id
138 similarity = case graphMetric of
139 Nothing -> withMetric Order1
140 Just m -> withMetric m
141
142 listId <- defaultList cId
143 repo <- getRepo' [listId]
144 let v = repo ^. unNodeStory . at listId . _Just . a_version
145
146 case graph of
147 Nothing -> do
148 graph' <- computeGraph cId similarity NgramsTerms repo
149 mt <- defaultGraphMetadata cId "Title" repo (fromMaybe Order1 maybeDistance)
150 let graph'' = set graph_metadata (Just mt) graph'
151 _ <- updateHyperdata nId (HyperdataGraph (Just graph'') camera)
152 pure $ trace "[G.V.G.API.recomputeGraph] Graph empty, computed" graph''
153
154 Just graph' -> if listVersion == Just v
155 then pure graph'
156 else do
157 graph'' <- computeGraph cId similarity NgramsTerms repo
158 let graph''' = set graph_metadata graphMetadata graph''
159 _ <- updateHyperdata nId (HyperdataGraph (Just graph''') camera)
160 pure $ trace "[G.V.G.API] Graph exists, recomputing" graph'''
161
162
163 -- TODO use Database Monad only here ?
164 --computeGraph :: HasNodeError err
165 -- => CorpusId
166 -- -> Distance
167 -- -> NgramsType
168 -- -> NodeListStory
169 -- -> Cmd err Graph
170 computeGraph :: FlowCmdM env err m
171 => CorpusId
172 -> Distance
173 -> NgramsType
174 -> NodeListStory
175 -> m Graph
176 computeGraph cId d nt repo = do
177 lId <- defaultList cId
178 lIds <- selectNodesWithUsername NodeList userMaster
179
180 let ngs = filterListWithRoot MapTerm
181 $ mapTermListRoot [lId] nt repo
182
183 myCooc <- HashMap.filter (>2) -- Removing the hapax (ngrams with 1 cooc)
184 <$> getCoocByNgrams (Diagonal True)
185 <$> groupNodesByNgrams ngs
186 <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
187
188 -- printDebug "myCooc" myCooc
189 -- saveAsFileDebug "debug/my-cooc" myCooc
190
191 listNgrams <- getListNgrams [lId] nt
192
193 graph <- liftBase $ cooc2graphWith Spinglass d 0 myCooc
194 -- saveAsFileDebug "debug/graph" graph
195 pure $ mergeGraphNgrams graph (Just listNgrams)
196
197
198 defaultGraphMetadata :: HasNodeError err
199 => CorpusId
200 -> Text
201 -> NodeListStory
202 -> GraphMetric
203 -> Cmd err GraphMetadata
204 defaultGraphMetadata cId t repo gm = do
205 lId <- defaultList cId
206
207 pure $ GraphMetadata {
208 _gm_title = t
209 , _gm_metric = gm
210 , _gm_corpusId = [cId]
211 , _gm_legend = [
212 LegendField 1 "#FFF" "Cluster1"
213 , LegendField 2 "#FFF" "Cluster2"
214 , LegendField 3 "#FFF" "Cluster3"
215 , LegendField 4 "#FFF" "Cluster4"
216 ]
217 , _gm_list = (ListForGraph lId (repo ^. unNodeStory . at lId . _Just . a_version))
218 , _gm_startForceAtlas = True
219 }
220 -- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
221
222
223 ------------------------------------------------------------
224 type GraphAsyncAPI = Summary "Recompute graph"
225 :> "recompute"
226 :> AsyncJobsAPI JobLog () JobLog
227
228
229 graphAsync :: UserId -> NodeId -> GargServer GraphAsyncAPI
230 graphAsync u n =
231 serveJobsAPI $
232 JobFunction (\_ log' -> graphRecompute u n (liftBase . log'))
233
234
235 --graphRecompute :: UserId
236 -- -> NodeId
237 -- -> (JobLog -> GargNoServer ())
238 -- -> GargNoServer JobLog
239 graphRecompute :: FlowCmdM env err m
240 => UserId
241 -> NodeId
242 -> (JobLog -> m ())
243 -> m JobLog
244 graphRecompute u n logStatus = do
245 logStatus JobLog { _scst_succeeded = Just 0
246 , _scst_failed = Just 0
247 , _scst_remaining = Just 1
248 , _scst_events = Just []
249 }
250 _g <- trace (show u) $ recomputeGraph u n Nothing
251 pure JobLog { _scst_succeeded = Just 1
252 , _scst_failed = Just 0
253 , _scst_remaining = Just 0
254 , _scst_events = Just []
255 }
256
257 ------------------------------------------------------------
258 type GraphVersionsAPI = Summary "Graph versions"
259 :> Get '[JSON] GraphVersions
260 :<|> Summary "Recompute graph version"
261 :> Post '[JSON] Graph
262
263 graphVersionsAPI :: UserId -> NodeId -> GargServer GraphVersionsAPI
264 graphVersionsAPI u n =
265 graphVersions 0 n
266 :<|> recomputeVersions u n
267
268 graphVersions :: Int -> NodeId -> GargNoServer GraphVersions
269 graphVersions n nId = do
270 nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
271 let
272 graph = nodeGraph
273 ^. node_hyperdata
274 . hyperdataGraph
275
276 listVersion = graph
277 ^? _Just
278 . graph_metadata
279 . _Just
280 . gm_list
281 . lfg_version
282
283 mcId <- getClosestParentIdByType nId NodeCorpus
284 let cId = maybe (panic "[G.V.G.API] Node has no parent") identity mcId
285
286 maybeListId <- defaultListMaybe cId
287 case maybeListId of
288 Nothing -> if n <= 2
289 then graphVersions (n+1) cId
290 else panic "[G.V.G.API] list not found after iterations"
291
292 Just listId -> do
293 repo <- getRepo' [listId]
294 let v = repo ^. unNodeStory . at listId . _Just . a_version
295 printDebug "graphVersions" v
296
297 pure $ GraphVersions { gv_graph = listVersion
298 , gv_repo = v }
299
300 --recomputeVersions :: UserId -> NodeId -> GargNoServer Graph
301 recomputeVersions :: FlowCmdM env err m
302 => UserId
303 -> NodeId
304 -> m Graph
305 recomputeVersions uId nId = recomputeGraph uId nId Nothing
306
307 ------------------------------------------------------------
308 graphClone :: UserId
309 -> NodeId
310 -> HyperdataGraphAPI
311 -> GargNoServer NodeId
312 graphClone uId pId (HyperdataGraphAPI { _hyperdataAPIGraph = graph
313 , _hyperdataAPICamera = camera }) = do
314 let nodeType = NodeGraph
315 nodeUser <- getNodeUser (NodeId uId)
316 nodeParent <- getNodeWith pId (Proxy :: Proxy HyperdataGraph)
317 let uId' = nodeUser ^. node_user_id
318 nIds <- mkNodeWithParent nodeType (Just pId) uId' $ nodeParent ^. node_name
319 case nIds of
320 [] -> pure pId
321 (nId:_) -> do
322 let graphP = graph
323 let graphP' = set (graph_metadata . _Just . gm_startForceAtlas) False graphP
324
325 _ <- updateHyperdata nId (HyperdataGraph (Just graphP') camera)
326
327 pure nId
328
329 ------------------------------------------------------------
330 --getGraphGexf :: UserId
331 -- -> NodeId
332 -- -> GargNoServer (Headers '[Servant.Header "Content-Disposition" Text] Graph)
333 getGraphGexf :: FlowCmdM env err m
334 => UserId
335 -> NodeId
336 -> m (Headers '[Servant.Header "Content-Disposition" Text] Graph)
337 getGraphGexf uId nId = do
338 HyperdataGraphAPI { _hyperdataAPIGraph = graph } <- getGraph uId nId
339 pure $ addHeader "attachment; filename=graph.gexf" graph
340
341
342
343
344
345