]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[FUN] toGroupedText before toGroupedTextTree.
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TypeOperators #-}
22 {-# LANGUAGE TypeFamilies #-}
23
24 module Gargantext.API.Ngrams
25 ( TableNgramsApi
26 , TableNgramsApiGet
27 , TableNgramsApiPut
28
29 , getTableNgrams
30 , setListNgrams
31 --, rmListNgrams TODO fix before exporting
32 , apiNgramsTableCorpus
33 , apiNgramsTableDoc
34
35 , NgramsStatePatch
36 , NgramsTablePatch
37 , NgramsTableMap
38
39 , NgramsTerm(..)
40
41 , NgramsElement(..)
42 , mkNgramsElement
43
44 , RootParent(..)
45
46 , MSet
47 , mSetFromList
48 , mSetToList
49
50 , Repo(..)
51 , r_version
52 , r_state
53 , r_history
54 , NgramsRepo
55 , NgramsRepoElement(..)
56 , saveRepo
57 , initRepo
58
59 , RepoEnv(..)
60 , renv_var
61 , renv_lock
62
63 , TabType(..)
64
65 , HasRepoVar(..)
66 , HasRepoSaver(..)
67 , HasRepo(..)
68 , RepoCmdM
69 , QueryParamR
70 , TODO
71
72 -- Internals
73 , getNgramsTableMap
74 , dumpJsonTableMap
75 , tableNgramsPull
76 , tableNgramsPut
77
78 , Version
79 , Versioned(..)
80 , currentVersion
81 , listNgramsChangedSince
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import qualified Data.Aeson.Text as DAT
90 import Data.Either (Either(..))
91 import Data.Foldable
92 import qualified Data.List as List
93 import Data.Map.Strict (Map)
94 import qualified Data.Map.Strict as Map
95 import qualified Data.Map.Strict.Patch as PM
96 import Data.Maybe (fromMaybe)
97 import Data.Monoid
98 import Data.Ord (Down(..))
99 import Data.Patch.Class (Action(act), Transformable(..), ours)
100 import qualified Data.Set as S
101 import qualified Data.Set as Set
102 import Data.Swagger hiding (version, patch)
103 import Data.Text (Text, isInfixOf, unpack)
104 import Data.Text.Lazy.IO as DTL
105 import Formatting (hprint, int, (%))
106 import Formatting.Clock (timeSpecs)
107 import GHC.Generics (Generic)
108 import Servant hiding (Patch)
109 import System.Clock (getTime, TimeSpec, Clock(..))
110 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
111 import System.IO (stderr)
112 import Test.QuickCheck (elements)
113 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
114
115 import Prelude (error)
116 import Gargantext.Prelude hiding (log)
117
118 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
119 import Gargantext.API.Admin.Types (HasSettings)
120 import qualified Gargantext.API.Metrics as Metrics
121 import Gargantext.API.Ngrams.Types
122 import Gargantext.API.Prelude
123 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
124 import Gargantext.Core.Utils (something)
125 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
126 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
127 import Gargantext.Database.Action.Flow.Types
128 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
129 import Gargantext.Database.Admin.Config (userMaster)
130 import Gargantext.Database.Admin.Types.Node (NodeType(..))
131 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
132 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
133 import Gargantext.Database.Query.Table.Node.Select
134 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
135 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
136 import Gargantext.Database.Query.Table.Node (getNode)
137 import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
138 import Gargantext.Prelude.Job
139
140 {-
141 -- TODO sequences of modifications (Patchs)
142 type NgramsIdPatch = Patch NgramsId NgramsPatch
143
144 ngramsPatch :: Int -> NgramsPatch
145 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
146
147 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
148 toEdit n p = Edit n p
149 ngramsIdPatch :: Patch NgramsId NgramsPatch
150 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
151 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
152 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
153 ]
154
155 -- applyPatchBack :: Patch -> IO Patch
156 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
157 -}
158 ------------------------------------------------------------------------
159 ------------------------------------------------------------------------
160 ------------------------------------------------------------------------
161
162 {-
163 -- TODO: Replace.old is ignored which means that if the current list
164 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
165 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
166 -- However this should not happen in non conflicting situations.
167 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
168 mkListsUpdate nt patches =
169 [ (ngramsTypeId nt, ng, listTypeId lt)
170 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
171 , lt <- patch ^.. patch_list . new
172 ]
173
174 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
175 -> NgramsType
176 -> NgramsTablePatch
177 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
178 mkChildrenGroups addOrRem nt patches =
179 [ (ngramsTypeId nt, parent, child)
180 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
181 , child <- patch ^.. patch_children . to addOrRem . folded
182 ]
183 -}
184
185 ------------------------------------------------------------------------
186
187 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
188 => m ()
189 saveRepo = liftBase =<< view repoSaver
190
191 listTypeConflictResolution :: ListType -> ListType -> ListType
192 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
193
194 ngramsStatePatchConflictResolution
195 :: TableNgrams.NgramsType
196 -> NodeId
197 -> NgramsTerm
198 -> ConflictResolutionNgramsPatch
199 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
200 = (ours, (const ours, ours), (False, False))
201 -- (False, False) mean here that Mod has always priority.
202 -- (True, False) <- would mean priority to the left (same as ours).
203
204 -- undefined {- TODO think this through -}, listTypeConflictResolution)
205
206 -- Current state:
207 -- Insertions are not considered as patches,
208 -- they do not extend history,
209 -- they do not bump version.
210 insertNewOnly :: a -> Maybe b -> a
211 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
212 -- TODO error handling
213
214 {- unused
215 -- TODO refactor with putListNgrams
216 copyListNgrams :: RepoCmdM env err m
217 => NodeId -> NodeId -> NgramsType
218 -> m ()
219 copyListNgrams srcListId dstListId ngramsType = do
220 var <- view repoVar
221 liftBase $ modifyMVar_ var $
222 pure . (r_state . at ngramsType %~ (Just . f . something))
223 saveRepo
224 where
225 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
226 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
227
228 -- TODO refactor with putListNgrams
229 -- The list must be non-empty!
230 -- The added ngrams must be non-existent!
231 addListNgrams :: RepoCmdM env err m
232 => NodeId -> NgramsType
233 -> [NgramsElement] -> m ()
234 addListNgrams listId ngramsType nes = do
235 var <- view repoVar
236 liftBase $ modifyMVar_ var $
237 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
238 saveRepo
239 where
240 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
241 -}
242
243 -- UNSAFE
244 rmListNgrams :: RepoCmdM env err m
245 => ListId
246 -> TableNgrams.NgramsType
247 -> m ()
248 rmListNgrams l nt = setListNgrams l nt mempty
249
250 -- | TODO: incr the Version number
251 -- && should use patch
252 -- UNSAFE
253 setListNgrams :: RepoCmdM env err m
254 => NodeId
255 -> TableNgrams.NgramsType
256 -> Map NgramsTerm NgramsRepoElement
257 -> m ()
258 setListNgrams listId ngramsType ns = do
259 var <- view repoVar
260 liftBase $ modifyMVar_ var $
261 pure . ( r_state
262 . at ngramsType %~
263 (Just .
264 (at listId .~ ( Just ns))
265 . something
266 )
267 )
268 saveRepo
269
270
271 currentVersion :: RepoCmdM env err m
272 => m Version
273 currentVersion = do
274 var <- view repoVar
275 r <- liftBase $ readMVar var
276 pure $ r ^. r_version
277
278 newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
279 newNgramsFromNgramsStatePatch p =
280 [ text2ngrams (unNgramsTerm n)
281 | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
282 , _ <- np ^.. patch_new . _Just
283 ]
284
285 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
286 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
287 commitStatePatch (Versioned p_version p) = do
288 var <- view repoVar
289 vq' <- liftBase $ modifyMVar var $ \r -> do
290 let
291 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
292 (p', q') = transformWith ngramsStatePatchConflictResolution p q
293 r' = r & r_version +~ 1
294 & r_state %~ act p'
295 & r_history %~ (p' :)
296 {-
297 -- Ideally we would like to check these properties. However:
298 -- * They should be checked only to debug the code. The client data
299 -- should be able to trigger these.
300 -- * What kind of error should they throw (we are in IO here)?
301 -- * Should we keep modifyMVar?
302 -- * Should we throw the validation in an Exception, catch it around
303 -- modifyMVar and throw it back as an Error?
304 assertValid $ transformable p q
305 assertValid $ applicable p' (r ^. r_state)
306 -}
307 pure (r', Versioned (r' ^. r_version) q')
308
309 saveRepo
310 pure vq'
311
312 -- This is a special case of tableNgramsPut where the input patch is empty.
313 tableNgramsPull :: RepoCmdM env err m
314 => ListId
315 -> TableNgrams.NgramsType
316 -> Version
317 -> m (Versioned NgramsTablePatch)
318 tableNgramsPull listId ngramsType p_version = do
319 var <- view repoVar
320 r <- liftBase $ readMVar var
321
322 let
323 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
324 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
325
326 pure (Versioned (r ^. r_version) q_table)
327
328 -- Apply the given patch to the DB and returns the patch to be applied on the
329 -- client.
330 -- TODO-ACCESS check
331 tableNgramsPut :: ( FlowCmdM env err m
332 , HasSettings env
333 )
334 => TabType
335 -> ListId
336 -> Versioned NgramsTablePatch
337 -> m (Versioned NgramsTablePatch)
338 tableNgramsPut tabType listId (Versioned p_version p_table)
339 | p_table == mempty = do
340 let ngramsType = ngramsTypeFromTabType tabType
341 tableNgramsPull listId ngramsType p_version
342
343 | otherwise = do
344 let ngramsType = ngramsTypeFromTabType tabType
345 (p0, p0_validity) = PM.singleton listId p_table
346 (p, p_validity) = PM.singleton ngramsType p0
347
348 assertValid p0_validity
349 assertValid p_validity
350
351 ret <- commitStatePatch (Versioned p_version p)
352 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
353
354 pure ret
355
356
357 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
358 , HasNodeError err
359 , HasSettings env
360 )
361 => UpdateTableNgramsCharts
362 -> (JobLog -> m ())
363 -> m JobLog
364 tableNgramsPostChartsAsync utn logStatus = do
365 let tabType = utn ^. utn_tab_type
366 let listId = utn ^. utn_list_id
367
368 node <- getNode listId
369 let nId = node ^. node_id
370 _uId = node ^. node_userId
371 mCId = node ^. node_parentId
372
373 printDebug "[tableNgramsPut] tabType" tabType
374 printDebug "[tableNgramsPut] listId" listId
375
376 case mCId of
377 Nothing -> do
378 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
379 pure $ jobLogFail $ jobLogInit 1
380 Just cId -> do
381 case tabType of
382 Authors -> do
383 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
384 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
385 logRef
386 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
387 logRefSuccess
388
389 getRef
390 Institutes -> do
391 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
392 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
393 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
394 logRef
395 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
396 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
397 logRefSuccess
398 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
399 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
400 logRefSuccess
401 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
402 logRefSuccess
403
404 getRef
405 Sources -> do
406 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
407 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
408 logRef
409 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
410 logRefSuccess
411
412 getRef
413 Terms -> do
414 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
415 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
416 logRef
417 {-
418 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
419 logRefSuccess
420 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
421 logRefSuccess
422 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
423 logRefSuccess
424 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
425 logRefSuccess
426 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
427 logRefSuccess
428 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
429 -}
430 logRefSuccess
431
432 getRef
433 _ -> do
434 printDebug "[tableNgramsPut] no update for tabType = " tabType
435 pure $ jobLogFail $ jobLogInit 1
436
437 {-
438 { _ne_list :: ListType
439 If we merge the parents/children we can potentially create cycles!
440 , _ne_parent :: Maybe NgramsTerm
441 , _ne_children :: MSet NgramsTerm
442 }
443 -}
444
445 getNgramsTableMap :: RepoCmdM env err m
446 => NodeId
447 -> TableNgrams.NgramsType
448 -> m (Versioned NgramsTableMap)
449 getNgramsTableMap nodeId ngramsType = do
450 v <- view repoVar
451 repo <- liftBase $ readMVar v
452 pure $ Versioned (repo ^. r_version)
453 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
454
455 dumpJsonTableMap :: RepoCmdM env err m
456 => Text
457 -> NodeId
458 -> TableNgrams.NgramsType
459 -> m ()
460 dumpJsonTableMap fpath nodeId ngramsType = do
461 m <- getNgramsTableMap nodeId ngramsType
462 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
463 pure ()
464
465 type MinSize = Int
466 type MaxSize = Int
467
468 -- | TODO Errors management
469 -- TODO: polymorphic for Annuaire or Corpus or ...
470 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
471 -- TODO: should take only one ListId
472
473 getTime' :: MonadBase IO m => m TimeSpec
474 getTime' = liftBase $ getTime ProcessCPUTime
475
476
477 getTableNgrams :: forall env err m.
478 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
479 => NodeType -> NodeId -> TabType
480 -> ListId -> Limit -> Maybe Offset
481 -> Maybe ListType
482 -> Maybe MinSize -> Maybe MaxSize
483 -> Maybe OrderBy
484 -> (NgramsTerm -> Bool)
485 -> m (Versioned NgramsTable)
486 getTableNgrams _nType nId tabType listId limit_ offset
487 listType minSize maxSize orderBy searchQuery = do
488
489 t0 <- getTime'
490 -- lIds <- selectNodesWithUsername NodeList userMaster
491 let
492 ngramsType = ngramsTypeFromTabType tabType
493 offset' = maybe 0 identity offset
494 listType' = maybe (const True) (==) listType
495 minSize' = maybe (const True) (<=) minSize
496 maxSize' = maybe (const True) (>=) maxSize
497
498 selected_node n = minSize' s
499 && maxSize' s
500 && searchQuery (n ^. ne_ngrams)
501 && listType' (n ^. ne_list)
502 where
503 s = n ^. ne_size
504
505 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
506
507 ---------------------------------------
508 sortOnOrder Nothing = identity
509 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
510 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
511 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
512 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
513
514 ---------------------------------------
515 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
516 selectAndPaginate tableMap = roots <> inners
517 where
518 list = tableMap ^.. each
519 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
520 (ne ^. ne_root)
521 selected_nodes = list & take limit_
522 . drop offset'
523 . filter selected_node
524 . sortOnOrder orderBy
525 roots = rootOf <$> selected_nodes
526 rootsSet = Set.fromList (_ne_ngrams <$> roots)
527 inners = list & filter (selected_inner rootsSet)
528
529 ---------------------------------------
530 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
531 setScores False table = pure table
532 setScores True table = do
533 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
534 t1 <- getTime'
535 occurrences <- getOccByNgramsOnlyFast' nId
536 listId
537 ngramsType
538 ngrams_terms
539 t2 <- getTime'
540 liftBase $ hprint stderr
541 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
542 (length ngrams_terms) t1 t2
543 {-
544 occurrences <- getOccByNgramsOnlySlow nType nId
545 (lIds <> [listId])
546 ngramsType
547 ngrams_terms
548 -}
549 let
550 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
551
552 pure $ table & each %~ setOcc
553 ---------------------------------------
554
555 -- lists <- catMaybes <$> listsWith userMaster
556 -- trace (show lists) $
557 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
558
559 let scoresNeeded = needsScores orderBy
560 tableMap1 <- getNgramsTableMap listId ngramsType
561 t1 <- getTime'
562 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
563 . Map.mapWithKey ngramsElementFromRepo
564 t2 <- getTime'
565 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
566 . setScores (not scoresNeeded)
567 . selectAndPaginate
568 t3 <- getTime'
569 liftBase $ hprint stderr
570 ("getTableNgrams total=" % timeSpecs
571 % " map1=" % timeSpecs
572 % " map2=" % timeSpecs
573 % " map3=" % timeSpecs
574 % " sql=" % (if scoresNeeded then "map2" else "map3")
575 % "\n"
576 ) t0 t3 t0 t1 t1 t2 t2 t3
577 pure tableMap3
578
579
580 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
581 scoresRecomputeTableNgrams nId tabType listId = do
582 tableMap <- getNgramsTableMap listId ngramsType
583 _ <- tableMap & v_data %%~ setScores
584 . Map.mapWithKey ngramsElementFromRepo
585
586 pure $ 1
587 where
588 ngramsType = ngramsTypeFromTabType tabType
589
590 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
591 setScores table = do
592 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
593 occurrences <- getOccByNgramsOnlyFast' nId
594 listId
595 ngramsType
596 ngrams_terms
597 let
598 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
599
600 pure $ table & each %~ setOcc
601
602
603
604 -- APIs
605
606 -- TODO: find a better place for the code above, All APIs stay here
607
608 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
609 deriving (Generic, Enum, Bounded, Read, Show)
610
611 instance FromHttpApiData OrderBy
612 where
613 parseUrlPiece "TermAsc" = pure TermAsc
614 parseUrlPiece "TermDesc" = pure TermDesc
615 parseUrlPiece "ScoreAsc" = pure ScoreAsc
616 parseUrlPiece "ScoreDesc" = pure ScoreDesc
617 parseUrlPiece _ = Left "Unexpected value of OrderBy"
618
619
620 instance ToParamSchema OrderBy
621 instance FromJSON OrderBy
622 instance ToJSON OrderBy
623 instance ToSchema OrderBy
624 instance Arbitrary OrderBy
625 where
626 arbitrary = elements [minBound..maxBound]
627
628 needsScores :: Maybe OrderBy -> Bool
629 needsScores (Just ScoreAsc) = True
630 needsScores (Just ScoreDesc) = True
631 needsScores _ = False
632
633 type TableNgramsApiGet = Summary " Table Ngrams API Get"
634 :> QueryParamR "ngramsType" TabType
635 :> QueryParamR "list" ListId
636 :> QueryParamR "limit" Limit
637 :> QueryParam "offset" Offset
638 :> QueryParam "listType" ListType
639 :> QueryParam "minTermSize" MinSize
640 :> QueryParam "maxTermSize" MaxSize
641 :> QueryParam "orderBy" OrderBy
642 :> QueryParam "search" Text
643 :> Get '[JSON] (Versioned NgramsTable)
644
645 type TableNgramsApiPut = Summary " Table Ngrams API Change"
646 :> QueryParamR "ngramsType" TabType
647 :> QueryParamR "list" ListId
648 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
649 :> Put '[JSON] (Versioned NgramsTablePatch)
650
651 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
652 :> QueryParamR "ngramsType" TabType
653 :> QueryParamR "list" ListId
654 :> "recompute" :> Post '[JSON] Int
655
656 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
657 :> QueryParamR "ngramsType" TabType
658 :> QueryParamR "list" ListId
659 :> Get '[JSON] Version
660
661 type TableNgramsApi = TableNgramsApiGet
662 :<|> TableNgramsApiPut
663 :<|> RecomputeScoresNgramsApiGet
664 :<|> "version" :> TableNgramsApiGetVersion
665 :<|> TableNgramsAsyncApi
666
667 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
668 :> "async"
669 :> "charts"
670 :> "update"
671 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
672
673 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
674 => NodeId
675 -> TabType
676 -> ListId
677 -> Limit
678 -> Maybe Offset
679 -> Maybe ListType
680 -> Maybe MinSize -> Maybe MaxSize
681 -> Maybe OrderBy
682 -> Maybe Text -- full text search
683 -> m (Versioned NgramsTable)
684 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
685 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
686 where
687 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
688
689 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
690 => NodeId
691 -> TabType
692 -> ListId
693 -> m Version
694 getTableNgramsVersion _nId _tabType _listId = currentVersion
695 -- TODO: limit?
696 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
697 -- This line above looks like a waste of computation to finally get only the version.
698 -- See the comment about listNgramsChangedSince.
699
700
701 -- | Text search is deactivated for now for ngrams by doc only
702 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
703 => DocId -> TabType
704 -> ListId -> Limit -> Maybe Offset
705 -> Maybe ListType
706 -> Maybe MinSize -> Maybe MaxSize
707 -> Maybe OrderBy
708 -> Maybe Text -- full text search
709 -> m (Versioned NgramsTable)
710 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
711 ns <- selectNodesWithUsername NodeList userMaster
712 let ngramsType = ngramsTypeFromTabType tabType
713 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
714 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
715 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
716
717
718
719 apiNgramsTableCorpus :: ( GargServerC env err m
720 )
721 => NodeId -> ServerT TableNgramsApi m
722 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
723 :<|> tableNgramsPut
724 :<|> scoresRecomputeTableNgrams cId
725 :<|> getTableNgramsVersion cId
726 :<|> apiNgramsAsync cId
727
728 apiNgramsTableDoc :: ( GargServerC env err m
729 )
730 => DocId -> ServerT TableNgramsApi m
731 apiNgramsTableDoc dId = getTableNgramsDoc dId
732 :<|> tableNgramsPut
733 :<|> scoresRecomputeTableNgrams dId
734 :<|> getTableNgramsVersion dId
735 :<|> apiNgramsAsync dId
736 -- > index all the corpus accordingly (TODO AD)
737
738 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
739 apiNgramsAsync _dId =
740 serveJobsAPI $
741 JobFunction $ \i log ->
742 let
743 log' x = do
744 printDebug "tableNgramsPostChartsAsync" x
745 liftBase $ log x
746 in tableNgramsPostChartsAsync i log'
747
748 -- Did the given list of ngrams changed since the given version?
749 -- The returned value is versioned boolean value, meaning that one always retrieve the
750 -- latest version.
751 -- If the given version is negative then one simply receive the latest version and True.
752 -- Using this function is more precise than simply comparing the latest version number
753 -- with the local version number. Indeed there might be no change to this particular list
754 -- and still the version number has changed because of other lists.
755 --
756 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
757 -- * currentVersion: good computation, good bandwidth, bad precision.
758 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
759 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
760 listNgramsChangedSince :: RepoCmdM env err m
761 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
762 listNgramsChangedSince listId ngramsType version
763 | version < 0 =
764 Versioned <$> currentVersion <*> pure True
765 | otherwise =
766 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)