]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/API.hs
[WIP] another way to optimize
[gargantext.git] / src / Gargantext / Core / Viz / Graph / API.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE BangPatterns #-}
13 {-# LANGUAGE OverloadedLists #-} -- allows to write Map and HashMap as lists
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.Core.Viz.Graph.API
17 where
18
19 import Control.Lens (set, (^.), _Just, (^?), at)
20 import Data.Aeson
21 import Data.Maybe (fromMaybe)
22 import Data.Swagger
23 import Data.Text hiding (head)
24 import Debug.Trace (trace)
25 import GHC.Generics (Generic)
26 import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
27 import Gargantext.API.Admin.Orchestrator.Types
28 import Gargantext.API.Ngrams.Tools
29 import Gargantext.API.Prelude
30 import Gargantext.Core.Methods.Similarities (Similarity(..), GraphMetric(..), withMetric)
31 import Gargantext.Core.NodeStory
32 import Gargantext.Core.Types.Main
33 import Gargantext.Core.Viz.Graph
34 import Gargantext.Core.Viz.Graph.GEXF ()
35 import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
36 import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser)
37 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
38 import Gargantext.Database.Action.Node (mkNodeWithParent)
39 import Gargantext.Database.Admin.Config
40 import Gargantext.Database.Admin.Types.Node
41 import Gargantext.Database.Prelude (Cmd)
42 import Gargantext.Database.Query.Table.Node
43 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
44 import Gargantext.Database.Query.Table.Node.Select
45 import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
46 import Gargantext.Database.Query.Table.Node.User (getNodeUser)
47 import Gargantext.Database.Schema.Node
48 import Gargantext.Database.Schema.Ngrams
49 import Gargantext.Prelude
50 import Gargantext.Utils.Jobs (serveJobsAPI)
51 import Servant
52 import Servant.Job.Async (AsyncJobsAPI)
53 import Servant.XML
54 import qualified Data.HashMap.Strict as HashMap
55
56 ------------------------------------------------------------------------
57 -- | There is no Delete specific API for Graph since it can be deleted
58 -- as simple Node.
59 type GraphAPI = Get '[JSON] HyperdataGraphAPI
60 :<|> "async" :> GraphAsyncAPI
61 :<|> "clone"
62 :> ReqBody '[JSON] HyperdataGraphAPI
63 :> Post '[JSON] NodeId
64 :<|> "gexf" :> Get '[XML] (Headers '[Servant.Header "Content-Disposition" Text] Graph)
65 :<|> "versions" :> GraphVersionsAPI
66
67 data GraphVersions =
68 GraphVersions { gv_graph :: Maybe Int
69 , gv_repo :: Int
70 }
71 deriving (Show, Generic)
72
73 instance FromJSON GraphVersions
74 instance ToJSON GraphVersions
75 instance ToSchema GraphVersions
76
77 graphAPI :: UserId -> NodeId -> ServerT GraphAPI (GargM Env GargError)
78 graphAPI u n = getGraph u n
79 :<|> graphAsync u n
80 :<|> graphClone u n
81 :<|> getGraphGexf u n
82 :<|> graphVersionsAPI u n
83
84 ------------------------------------------------------------------------
85 --getGraph :: UserId -> NodeId -> GargServer HyperdataGraphAPI
86 getGraph :: FlowCmdM env err m
87 => UserId
88 -> NodeId
89 -> m HyperdataGraphAPI
90 getGraph _uId nId = do
91 nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
92
93 let
94 graph = nodeGraph ^. node_hyperdata . hyperdataGraph
95 camera = nodeGraph ^. node_hyperdata . hyperdataCamera
96
97 mcId <- getClosestParentIdByType nId NodeCorpus
98 let cId = maybe (panic "[G.V.G.API] Node has no parent") identity mcId
99
100 -- printDebug "[getGraph] getting list for cId" cId
101 listId <- defaultList cId
102 repo <- getRepo [listId]
103
104 -- TODO Similarity in Graph params
105 case graph of
106 Nothing -> do
107 let defaultMetric = Order1
108 let defaultPartitionMethod = Spinglass
109 let defaultEdgesStrength = Strong
110 graph' <- computeGraph cId defaultPartitionMethod (withMetric defaultMetric) defaultEdgesStrength (NgramsTerms, NgramsTerms) repo
111 mt <- defaultGraphMetadata cId "Title" repo defaultMetric defaultEdgesStrength
112 let
113 graph'' = set graph_metadata (Just mt) graph'
114 hg = HyperdataGraphAPI graph'' camera
115 -- _ <- updateHyperdata nId hg
116 _ <- updateHyperdata nId (HyperdataGraph (Just graph'') camera)
117 pure $ trace "[G.V.G.API] Graph empty, computing" hg
118
119 Just graph' -> pure $ trace "[G.V.G.API] Graph exists, returning" $
120 HyperdataGraphAPI graph' camera
121
122
123 --recomputeGraph :: UserId -> NodeId -> Maybe GraphMetric -> GargNoServer Graph
124 recomputeGraph :: FlowCmdM env err m
125 => UserId
126 -> NodeId
127 -> PartitionMethod
128 -> Maybe GraphMetric
129 -> Maybe Strength
130 -> NgramsType
131 -> NgramsType
132 -> Bool
133 -> m Graph
134 recomputeGraph _uId nId method maybeSimilarity maybeStrength nt1 nt2 force = do
135 nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
136 let
137 graph = nodeGraph ^. node_hyperdata . hyperdataGraph
138 camera = nodeGraph ^. node_hyperdata . hyperdataCamera
139 graphMetadata = graph ^? _Just . graph_metadata . _Just
140 listVersion = graph ^? _Just . graph_metadata . _Just . gm_list . lfg_version
141 graphMetric = case maybeSimilarity of
142 Nothing -> graph ^? _Just . graph_metadata . _Just . gm_metric
143 _ -> maybeSimilarity
144 similarity = case graphMetric of
145 Nothing -> withMetric Order1
146 Just m -> withMetric m
147
148 strength = case maybeStrength of
149 Nothing -> case graph ^? _Just . graph_metadata . _Just . gm_edgesStrength of
150 Nothing -> Strong
151 Just mr -> fromMaybe Strong mr
152 Just r -> r
153
154 mcId <- getClosestParentIdByType nId NodeCorpus
155 let cId = maybe (panic "[G.V.G.API] Node has no parent") identity mcId
156
157 listId <- defaultList cId
158 repo <- getRepo [listId]
159 let v = repo ^. unNodeStory . at listId . _Just . a_version
160
161 let computeG mt = do
162 !g <- computeGraph cId method similarity strength (nt1,nt2) repo
163 let g' = set graph_metadata mt g
164 _nentries <- updateHyperdata nId (HyperdataGraph (Just g') camera)
165 pure g'
166
167 case graph of
168 Nothing -> do
169 mt <- defaultGraphMetadata cId "Title" repo (fromMaybe Order1 maybeSimilarity) strength
170 g <- computeG $ Just mt
171 pure $ trace "[G.V.G.API.recomputeGraph] Graph empty, computed" g
172 Just graph' -> if (listVersion == Just v) && (not force)
173 then pure graph'
174 else do
175 g <- computeG graphMetadata
176 pure $ trace "[G.V.G.API] Graph exists, recomputing" g
177
178
179 -- TODO remove repo
180 computeGraph :: FlowCmdM env err m
181 => CorpusId
182 -> PartitionMethod
183 -> Similarity
184 -> Strength
185 -> (NgramsType, NgramsType)
186 -> NodeListStory
187 -> m Graph
188 computeGraph corpusId method similarity strength (nt1,nt2) repo = do
189 -- Getting the Node parameters
190 lId <- defaultList corpusId
191 lIds <- selectNodesWithUsername NodeList userMaster
192
193 -- Getting the Ngrams to compute with and grouping it according to the lists
194 let
195 groupedContextsByNgrams nt corpusId' (lists_master, lists_user) = do
196 let
197 ngs = filterListWithRoot [MapTerm] $ mapTermListRoot lists_user nt repo
198 groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser corpusId'
199 (lists_user <> lists_master) nt (HashMap.keys ngs)
200
201 -- Optim if nt1 == nt2 : do not compute twice
202 (m1,m2) <- do
203 m1 <- groupedContextsByNgrams nt1 corpusId (lIds, [lId])
204 if nt1 == nt2
205 then
206 pure (m1,m1)
207 else do
208 m2 <- groupedContextsByNgrams nt2 corpusId (lIds, [lId])
209 pure (m1,m2)
210
211 -- Removing the hapax (ngrams with 1 cooc)
212 let !myCooc = HashMap.filter (>1)
213 $ getCoocByNgrams'' (Diagonal True) (identity, identity) (m1,m2)
214
215 -- TODO MultiPartite Here
216 graph <- liftBase
217 $ cooc2graphWith method (MultiPartite (Partite (HashMap.keysSet m1) nt1)
218 (Partite (HashMap.keysSet m2) nt2)
219 )
220 similarity 0 strength myCooc
221
222 pure graph
223
224
225
226 defaultGraphMetadata :: HasNodeError err
227 => CorpusId
228 -> Text
229 -> NodeListStory
230 -> GraphMetric
231 -> Strength
232 -> Cmd err GraphMetadata
233 defaultGraphMetadata cId t repo gm str = do
234 lId <- defaultList cId
235
236 pure $ GraphMetadata { _gm_title = t
237 , _gm_metric = gm
238 , _gm_edgesStrength = Just str
239 , _gm_corpusId = [cId]
240 , _gm_legend = [
241 LegendField 1 "#FFF" "Cluster1"
242 , LegendField 2 "#FFF" "Cluster2"
243 , LegendField 3 "#FFF" "Cluster3"
244 , LegendField 4 "#FFF" "Cluster4"
245 ]
246 , _gm_list = (ListForGraph lId (repo ^. unNodeStory . at lId . _Just . a_version))
247 , _gm_startForceAtlas = True
248 }
249 -- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
250
251 ------------------------------------------------------------
252 type GraphAsyncAPI = Summary "Recompute graph"
253 :> "recompute"
254 :> AsyncJobsAPI JobLog () JobLog
255
256
257 graphAsync :: UserId -> NodeId -> ServerT GraphAsyncAPI (GargM Env GargError)
258 graphAsync u n =
259 serveJobsAPI RecomputeGraphJob $ \_ log' ->
260 graphRecompute u n (liftBase . log')
261
262
263 --graphRecompute :: UserId
264 -- -> NodeId
265 -- -> (JobLog -> GargNoServer ())
266 -- -> GargNoServer JobLog
267 -- TODO get Graph Metadata to recompute
268 graphRecompute :: FlowCmdM env err m
269 => UserId
270 -> NodeId
271 -> (JobLog -> m ())
272 -> m JobLog
273 graphRecompute u n logStatus = do
274 logStatus JobLog { _scst_succeeded = Just 0
275 , _scst_failed = Just 0
276 , _scst_remaining = Just 1
277 , _scst_events = Just []
278 }
279 _g <- recomputeGraph u n Spinglass Nothing Nothing NgramsTerms NgramsTerms False
280 pure JobLog { _scst_succeeded = Just 1
281 , _scst_failed = Just 0
282 , _scst_remaining = Just 0
283 , _scst_events = Just []
284 }
285
286 ------------------------------------------------------------
287 type GraphVersionsAPI = Summary "Graph versions"
288 :> Get '[JSON] GraphVersions
289 :<|> Summary "Recompute graph version"
290 :> Post '[JSON] Graph
291
292 graphVersionsAPI :: UserId -> NodeId -> GargServer GraphVersionsAPI
293 graphVersionsAPI u n =
294 graphVersions 0 n
295 :<|> recomputeVersions u n
296
297 graphVersions :: Int -> NodeId -> GargNoServer GraphVersions
298 graphVersions n nId = do
299 nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
300 let
301 graph = nodeGraph
302 ^. node_hyperdata
303 . hyperdataGraph
304
305 listVersion = graph
306 ^? _Just
307 . graph_metadata
308 . _Just
309 . gm_list
310 . lfg_version
311
312 mcId <- getClosestParentIdByType nId NodeCorpus
313 let cId = maybe (panic "[G.V.G.API] Node has no parent") identity mcId
314
315 maybeListId <- defaultListMaybe cId
316 case maybeListId of
317 Nothing -> if n <= 2
318 then graphVersions (n+1) cId
319 else panic "[G.V.G.API] list not found after iterations"
320
321 Just listId -> do
322 repo <- getRepo [listId]
323 let v = repo ^. unNodeStory . at listId . _Just . a_version
324 -- printDebug "graphVersions" v
325
326 pure $ GraphVersions { gv_graph = listVersion
327 , gv_repo = v }
328
329 --recomputeVersions :: UserId -> NodeId -> GargNoServer Graph
330 recomputeVersions :: FlowCmdM env err m
331 => UserId
332 -> NodeId
333 -> m Graph
334 recomputeVersions uId nId = recomputeGraph uId nId Spinglass Nothing Nothing NgramsTerms NgramsTerms False
335
336 ------------------------------------------------------------
337 graphClone :: UserId
338 -> NodeId
339 -> HyperdataGraphAPI
340 -> GargNoServer NodeId
341 graphClone uId pId (HyperdataGraphAPI { _hyperdataAPIGraph = graph
342 , _hyperdataAPICamera = camera }) = do
343 let nodeType = NodeGraph
344 nodeUser <- getNodeUser (NodeId uId)
345 nodeParent <- getNodeWith pId (Proxy :: Proxy HyperdataGraph)
346 let uId' = nodeUser ^. node_user_id
347 nIds <- mkNodeWithParent nodeType (Just pId) uId' $ nodeParent ^. node_name
348 case nIds of
349 [] -> pure pId
350 (nId:_) -> do
351 let graphP = graph
352 let graphP' = set (graph_metadata . _Just . gm_startForceAtlas) False graphP
353
354 _ <- updateHyperdata nId (HyperdataGraph (Just graphP') camera)
355
356 pure nId
357
358 ------------------------------------------------------------
359 --getGraphGexf :: UserId
360 -- -> NodeId
361 -- -> GargNoServer (Headers '[Servant.Header "Content-Disposition" Text] Graph)
362 getGraphGexf :: FlowCmdM env err m
363 => UserId
364 -> NodeId
365 -> m (Headers '[Servant.Header "Content-Disposition" Text] Graph)
366 getGraphGexf uId nId = do
367 HyperdataGraphAPI { _hyperdataAPIGraph = graph } <- getGraph uId nId
368 pure $ addHeader "attachment; filename=graph.gexf" graph
369