]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Merge branch 'dev' of https://gitlab.iscpif.fr/gargantext/haskell-gargantext into dev
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TypeOperators #-}
22 {-# LANGUAGE TypeFamilies #-}
23
24 module Gargantext.API.Ngrams
25 ( TableNgramsApi
26 , TableNgramsApiGet
27 , TableNgramsApiPut
28
29 , getTableNgrams
30 , setListNgrams
31 --, rmListNgrams TODO fix before exporting
32 , apiNgramsTableCorpus
33 , apiNgramsTableDoc
34
35 , NgramsStatePatch
36 , NgramsTablePatch
37 , NgramsTableMap
38
39 , NgramsTerm(..)
40
41 , NgramsElement(..)
42 , mkNgramsElement
43
44 , RootParent(..)
45
46 , MSet
47 , mSetFromList
48 , mSetToList
49
50 , Repo(..)
51 , r_version
52 , r_state
53 , r_history
54 , NgramsRepo
55 , NgramsRepoElement(..)
56 , saveRepo
57 , initRepo
58
59 , RepoEnv(..)
60 , renv_var
61 , renv_lock
62
63 , TabType(..)
64
65 , HasRepoVar(..)
66 , HasRepoSaver(..)
67 , HasRepo(..)
68 , RepoCmdM
69 , QueryParamR
70 , TODO
71
72 -- Internals
73 , getNgramsTableMap
74 , dumpJsonTableMap
75 , tableNgramsPull
76 , tableNgramsPut
77
78 , Version
79 , Versioned(..)
80 , currentVersion
81 , listNgramsChangedSince
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import qualified Data.Aeson.Text as DAT
90 import Data.Either (Either(..))
91 import Data.Foldable
92 import qualified Data.List as List
93 import Data.Map.Strict (Map)
94 import qualified Data.Map.Strict as Map
95 import qualified Data.Map.Strict.Patch as PM
96 import Data.Maybe (fromMaybe)
97 import Data.Monoid
98 import Data.Ord (Down(..))
99 import Data.Patch.Class (Action(act), Transformable(..), ours)
100 import qualified Data.Set as S
101 import qualified Data.Set as Set
102 import Data.Swagger hiding (version, patch)
103 import Data.Text (Text, isInfixOf, unpack)
104 import Data.Text.Lazy.IO as DTL
105 import Formatting (hprint, int, (%))
106 import Formatting.Clock (timeSpecs)
107 import GHC.Generics (Generic)
108 import Servant hiding (Patch)
109 import System.Clock (getTime, TimeSpec, Clock(..))
110 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
111 import System.IO (stderr)
112 import Test.QuickCheck (elements)
113 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
114
115 import Prelude (error)
116 import Gargantext.Prelude hiding (log)
117
118 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
119 import Gargantext.API.Admin.Types (HasSettings)
120 import qualified Gargantext.API.Metrics as Metrics
121 import Gargantext.API.Ngrams.Types
122 import Gargantext.API.Prelude
123 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
124 import Gargantext.Core.Utils (something)
125 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
126 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
127 import Gargantext.Database.Action.Flow.Types
128 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
129 import Gargantext.Database.Admin.Config (userMaster)
130 import Gargantext.Database.Admin.Types.Node (NodeType(..))
131 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
132 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
133 import Gargantext.Database.Query.Table.Node.Select
134 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
135 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
136 import Gargantext.Database.Query.Table.Node (getNode)
137 import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
138 import Gargantext.Prelude.Job
139
140 {-
141 -- TODO sequences of modifications (Patchs)
142 type NgramsIdPatch = Patch NgramsId NgramsPatch
143
144 ngramsPatch :: Int -> NgramsPatch
145 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
146
147 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
148 toEdit n p = Edit n p
149 ngramsIdPatch :: Patch NgramsId NgramsPatch
150 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
151 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
152 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
153 ]
154
155 -- applyPatchBack :: Patch -> IO Patch
156 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
157 -}
158 ------------------------------------------------------------------------
159 ------------------------------------------------------------------------
160 ------------------------------------------------------------------------
161
162 {-
163 -- TODO: Replace.old is ignored which means that if the current list
164 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
165 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
166 -- However this should not happen in non conflicting situations.
167 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
168 mkListsUpdate nt patches =
169 [ (ngramsTypeId nt, ng, listTypeId lt)
170 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
171 , lt <- patch ^.. patch_list . new
172 ]
173
174 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
175 -> NgramsType
176 -> NgramsTablePatch
177 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
178 mkChildrenGroups addOrRem nt patches =
179 [ (ngramsTypeId nt, parent, child)
180 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
181 , child <- patch ^.. patch_children . to addOrRem . folded
182 ]
183 -}
184
185 ------------------------------------------------------------------------
186
187 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
188 => m ()
189 saveRepo = liftBase =<< view repoSaver
190
191 listTypeConflictResolution :: ListType -> ListType -> ListType
192 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
193
194 ngramsStatePatchConflictResolution
195 :: TableNgrams.NgramsType
196 -> NodeId
197 -> NgramsTerm
198 -> ConflictResolutionNgramsPatch
199 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
200 = (ours, (const ours, ours), (False, False))
201 -- (False, False) mean here that Mod has always priority.
202 -- (True, False) <- would mean priority to the left (same as ours).
203
204 -- undefined {- TODO think this through -}, listTypeConflictResolution)
205
206 -- Current state:
207 -- Insertions are not considered as patches,
208 -- they do not extend history,
209 -- they do not bump version.
210 insertNewOnly :: a -> Maybe b -> a
211 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
212 -- TODO error handling
213
214 {- unused
215 -- TODO refactor with putListNgrams
216 copyListNgrams :: RepoCmdM env err m
217 => NodeId -> NodeId -> NgramsType
218 -> m ()
219 copyListNgrams srcListId dstListId ngramsType = do
220 var <- view repoVar
221 liftBase $ modifyMVar_ var $
222 pure . (r_state . at ngramsType %~ (Just . f . something))
223 saveRepo
224 where
225 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
226 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
227
228 -- TODO refactor with putListNgrams
229 -- The list must be non-empty!
230 -- The added ngrams must be non-existent!
231 addListNgrams :: RepoCmdM env err m
232 => NodeId -> NgramsType
233 -> [NgramsElement] -> m ()
234 addListNgrams listId ngramsType nes = do
235 var <- view repoVar
236 liftBase $ modifyMVar_ var $
237 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
238 saveRepo
239 where
240 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
241 -}
242
243 -- UNSAFE
244 rmListNgrams :: RepoCmdM env err m
245 => ListId
246 -> TableNgrams.NgramsType
247 -> m ()
248 rmListNgrams l nt = setListNgrams l nt mempty
249
250 -- | TODO: incr the Version number
251 -- && should use patch
252 -- UNSAFE
253 setListNgrams :: RepoCmdM env err m
254 => NodeId
255 -> TableNgrams.NgramsType
256 -> Map NgramsTerm NgramsRepoElement
257 -> m ()
258 setListNgrams listId ngramsType ns = do
259 var <- view repoVar
260 liftBase $ modifyMVar_ var $
261 pure . ( r_state
262 . at ngramsType %~
263 (Just .
264 (at listId .~ ( Just ns))
265 . something
266 )
267 )
268 saveRepo
269
270
271 currentVersion :: RepoCmdM env err m
272 => m Version
273 currentVersion = do
274 var <- view repoVar
275 r <- liftBase $ readMVar var
276 pure $ r ^. r_version
277
278
279 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
280 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
281 commitStatePatch (Versioned p_version p) = do
282 var <- view repoVar
283 vq' <- liftBase $ modifyMVar var $ \r -> do
284 let
285 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
286 (p', q') = transformWith ngramsStatePatchConflictResolution p q
287 r' = r & r_version +~ 1
288 & r_state %~ act p'
289 & r_history %~ (p' :)
290 {-
291 -- Ideally we would like to check these properties. However:
292 -- * They should be checked only to debug the code. The client data
293 -- should be able to trigger these.
294 -- * What kind of error should they throw (we are in IO here)?
295 -- * Should we keep modifyMVar?
296 -- * Should we throw the validation in an Exception, catch it around
297 -- modifyMVar and throw it back as an Error?
298 assertValid $ transformable p q
299 assertValid $ applicable p' (r ^. r_state)
300 -}
301 pure (r', Versioned (r' ^. r_version) q')
302
303 saveRepo
304 pure vq'
305
306 -- This is a special case of tableNgramsPut where the input patch is empty.
307 tableNgramsPull :: RepoCmdM env err m
308 => ListId
309 -> TableNgrams.NgramsType
310 -> Version
311 -> m (Versioned NgramsTablePatch)
312 tableNgramsPull listId ngramsType p_version = do
313 var <- view repoVar
314 r <- liftBase $ readMVar var
315
316 let
317 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
318 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
319
320 pure (Versioned (r ^. r_version) q_table)
321
322 -- Apply the given patch to the DB and returns the patch to be applied on the
323 -- client.
324 -- TODO-ACCESS check
325 tableNgramsPut :: ( FlowCmdM env err m
326 , HasSettings env
327 )
328 => TabType
329 -> ListId
330 -> Versioned NgramsTablePatch
331 -> m (Versioned NgramsTablePatch)
332 tableNgramsPut tabType listId (Versioned p_version p_table)
333 | p_table == mempty = do
334 let ngramsType = ngramsTypeFromTabType tabType
335 tableNgramsPull listId ngramsType p_version
336
337 | otherwise = do
338 let ngramsType = ngramsTypeFromTabType tabType
339 (p0, p0_validity) = PM.singleton listId p_table
340 (p, p_validity) = PM.singleton ngramsType p0
341
342 assertValid p0_validity
343 assertValid p_validity
344
345 ret <- commitStatePatch (Versioned p_version p)
346 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
347
348 pure ret
349
350
351 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
352 , HasNodeError err
353 , HasSettings env
354 )
355 => UpdateTableNgramsCharts
356 -> (JobLog -> m ())
357 -> m JobLog
358 tableNgramsPostChartsAsync utn logStatus = do
359 let tabType = utn ^. utn_tab_type
360 let listId = utn ^. utn_list_id
361
362 node <- getNode listId
363 let nId = node ^. node_id
364 _uId = node ^. node_userId
365 mCId = node ^. node_parentId
366
367 printDebug "[tableNgramsPut] tabType" tabType
368 printDebug "[tableNgramsPut] listId" listId
369
370 case mCId of
371 Nothing -> do
372 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
373 pure $ jobLogFail $ jobLogInit 1
374 Just cId -> do
375 case tabType of
376 Authors -> do
377 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
378 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
379 logRef
380 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
381 logRefSuccess
382
383 getRef
384 Institutes -> do
385 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
386 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
387 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
388 logRef
389 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
390 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
391 logRefSuccess
392 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
393 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
394 logRefSuccess
395 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
396 logRefSuccess
397
398 getRef
399 Sources -> do
400 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
401 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
402 logRef
403 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
404 logRefSuccess
405
406 getRef
407 Terms -> do
408 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
409 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
410 logRef
411 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
412 logRefSuccess
413 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
414 logRefSuccess
415 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
416 logRefSuccess
417 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
418 logRefSuccess
419 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
420 logRefSuccess
421 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
422 logRefSuccess
423
424 getRef
425 _ -> do
426 printDebug "[tableNgramsPut] no update for tabType = " tabType
427 pure $ jobLogFail $ jobLogInit 1
428
429 {-
430 { _ne_list :: ListType
431 If we merge the parents/children we can potentially create cycles!
432 , _ne_parent :: Maybe NgramsTerm
433 , _ne_children :: MSet NgramsTerm
434 }
435 -}
436
437 getNgramsTableMap :: RepoCmdM env err m
438 => NodeId
439 -> TableNgrams.NgramsType
440 -> m (Versioned NgramsTableMap)
441 getNgramsTableMap nodeId ngramsType = do
442 v <- view repoVar
443 repo <- liftBase $ readMVar v
444 pure $ Versioned (repo ^. r_version)
445 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
446
447 dumpJsonTableMap :: RepoCmdM env err m
448 => Text
449 -> NodeId
450 -> TableNgrams.NgramsType
451 -> m ()
452 dumpJsonTableMap fpath nodeId ngramsType = do
453 m <- getNgramsTableMap nodeId ngramsType
454 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
455 pure ()
456
457 type MinSize = Int
458 type MaxSize = Int
459
460 -- | TODO Errors management
461 -- TODO: polymorphic for Annuaire or Corpus or ...
462 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
463 -- TODO: should take only one ListId
464
465 getTime' :: MonadBase IO m => m TimeSpec
466 getTime' = liftBase $ getTime ProcessCPUTime
467
468
469 getTableNgrams :: forall env err m.
470 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
471 => NodeType -> NodeId -> TabType
472 -> ListId -> Limit -> Maybe Offset
473 -> Maybe ListType
474 -> Maybe MinSize -> Maybe MaxSize
475 -> Maybe OrderBy
476 -> (NgramsTerm -> Bool)
477 -> m (Versioned NgramsTable)
478 getTableNgrams _nType nId tabType listId limit_ offset
479 listType minSize maxSize orderBy searchQuery = do
480
481 t0 <- getTime'
482 -- lIds <- selectNodesWithUsername NodeList userMaster
483 let
484 ngramsType = ngramsTypeFromTabType tabType
485 offset' = maybe 0 identity offset
486 listType' = maybe (const True) (==) listType
487 minSize' = maybe (const True) (<=) minSize
488 maxSize' = maybe (const True) (>=) maxSize
489
490 selected_node n = minSize' s
491 && maxSize' s
492 && searchQuery (n ^. ne_ngrams)
493 && listType' (n ^. ne_list)
494 where
495 s = n ^. ne_size
496
497 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
498
499 ---------------------------------------
500 sortOnOrder Nothing = identity
501 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
502 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
503 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
504 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
505
506 ---------------------------------------
507 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
508 selectAndPaginate tableMap = roots <> inners
509 where
510 list = tableMap ^.. each
511 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
512 (ne ^. ne_root)
513 selected_nodes = list & take limit_
514 . drop offset'
515 . filter selected_node
516 . sortOnOrder orderBy
517 roots = rootOf <$> selected_nodes
518 rootsSet = Set.fromList (_ne_ngrams <$> roots)
519 inners = list & filter (selected_inner rootsSet)
520
521 ---------------------------------------
522 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
523 setScores False table = pure table
524 setScores True table = do
525 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
526 t1 <- getTime'
527 occurrences <- getOccByNgramsOnlyFast' nId
528 listId
529 ngramsType
530 ngrams_terms
531 t2 <- getTime'
532 liftBase $ hprint stderr
533 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
534 (length ngrams_terms) t1 t2
535 {-
536 occurrences <- getOccByNgramsOnlySlow nType nId
537 (lIds <> [listId])
538 ngramsType
539 ngrams_terms
540 -}
541 let
542 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
543
544 pure $ table & each %~ setOcc
545 ---------------------------------------
546
547 -- lists <- catMaybes <$> listsWith userMaster
548 -- trace (show lists) $
549 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
550
551 let scoresNeeded = needsScores orderBy
552 tableMap1 <- getNgramsTableMap listId ngramsType
553 t1 <- getTime'
554 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
555 . Map.mapWithKey ngramsElementFromRepo
556 t2 <- getTime'
557 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
558 . setScores (not scoresNeeded)
559 . selectAndPaginate
560 t3 <- getTime'
561 liftBase $ hprint stderr
562 ("getTableNgrams total=" % timeSpecs
563 % " map1=" % timeSpecs
564 % " map2=" % timeSpecs
565 % " map3=" % timeSpecs
566 % " sql=" % (if scoresNeeded then "map2" else "map3")
567 % "\n"
568 ) t0 t3 t0 t1 t1 t2 t2 t3
569 pure tableMap3
570
571
572 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
573 scoresRecomputeTableNgrams nId tabType listId = do
574 tableMap <- getNgramsTableMap listId ngramsType
575 _ <- tableMap & v_data %%~ setScores
576 . Map.mapWithKey ngramsElementFromRepo
577
578 pure $ 1
579 where
580 ngramsType = ngramsTypeFromTabType tabType
581
582 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
583 setScores table = do
584 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
585 occurrences <- getOccByNgramsOnlyFast' nId
586 listId
587 ngramsType
588 ngrams_terms
589 let
590 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
591
592 pure $ table & each %~ setOcc
593
594
595
596 -- APIs
597
598 -- TODO: find a better place for the code above, All APIs stay here
599
600 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
601 deriving (Generic, Enum, Bounded, Read, Show)
602
603 instance FromHttpApiData OrderBy
604 where
605 parseUrlPiece "TermAsc" = pure TermAsc
606 parseUrlPiece "TermDesc" = pure TermDesc
607 parseUrlPiece "ScoreAsc" = pure ScoreAsc
608 parseUrlPiece "ScoreDesc" = pure ScoreDesc
609 parseUrlPiece _ = Left "Unexpected value of OrderBy"
610
611
612 instance ToParamSchema OrderBy
613 instance FromJSON OrderBy
614 instance ToJSON OrderBy
615 instance ToSchema OrderBy
616 instance Arbitrary OrderBy
617 where
618 arbitrary = elements [minBound..maxBound]
619
620 needsScores :: Maybe OrderBy -> Bool
621 needsScores (Just ScoreAsc) = True
622 needsScores (Just ScoreDesc) = True
623 needsScores _ = False
624
625 type TableNgramsApiGet = Summary " Table Ngrams API Get"
626 :> QueryParamR "ngramsType" TabType
627 :> QueryParamR "list" ListId
628 :> QueryParamR "limit" Limit
629 :> QueryParam "offset" Offset
630 :> QueryParam "listType" ListType
631 :> QueryParam "minTermSize" MinSize
632 :> QueryParam "maxTermSize" MaxSize
633 :> QueryParam "orderBy" OrderBy
634 :> QueryParam "search" Text
635 :> Get '[JSON] (Versioned NgramsTable)
636
637 type TableNgramsApiPut = Summary " Table Ngrams API Change"
638 :> QueryParamR "ngramsType" TabType
639 :> QueryParamR "list" ListId
640 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
641 :> Put '[JSON] (Versioned NgramsTablePatch)
642
643 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
644 :> QueryParamR "ngramsType" TabType
645 :> QueryParamR "list" ListId
646 :> "recompute" :> Post '[JSON] Int
647
648 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
649 :> QueryParamR "ngramsType" TabType
650 :> QueryParamR "list" ListId
651 :> Get '[JSON] Version
652
653 type TableNgramsApi = TableNgramsApiGet
654 :<|> TableNgramsApiPut
655 :<|> RecomputeScoresNgramsApiGet
656 :<|> "version" :> TableNgramsApiGetVersion
657 :<|> TableNgramsAsyncApi
658
659 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
660 :> "async"
661 :> "charts"
662 :> "update"
663 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
664
665 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
666 => NodeId
667 -> TabType
668 -> ListId
669 -> Limit
670 -> Maybe Offset
671 -> Maybe ListType
672 -> Maybe MinSize -> Maybe MaxSize
673 -> Maybe OrderBy
674 -> Maybe Text -- full text search
675 -> m (Versioned NgramsTable)
676 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
677 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
678 where
679 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
680
681 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
682 => NodeId
683 -> TabType
684 -> ListId
685 -> m Version
686 getTableNgramsVersion _nId _tabType _listId = currentVersion
687 -- TODO: limit?
688 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
689 -- This line above looks like a waste of computation to finally get only the version.
690 -- See the comment about listNgramsChangedSince.
691
692
693 -- | Text search is deactivated for now for ngrams by doc only
694 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
695 => DocId -> TabType
696 -> ListId -> Limit -> Maybe Offset
697 -> Maybe ListType
698 -> Maybe MinSize -> Maybe MaxSize
699 -> Maybe OrderBy
700 -> Maybe Text -- full text search
701 -> m (Versioned NgramsTable)
702 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
703 ns <- selectNodesWithUsername NodeList userMaster
704 let ngramsType = ngramsTypeFromTabType tabType
705 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
706 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
707 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
708
709
710
711 apiNgramsTableCorpus :: ( GargServerC env err m
712 )
713 => NodeId -> ServerT TableNgramsApi m
714 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
715 :<|> tableNgramsPut
716 :<|> scoresRecomputeTableNgrams cId
717 :<|> getTableNgramsVersion cId
718 :<|> apiNgramsAsync cId
719
720 apiNgramsTableDoc :: ( GargServerC env err m
721 )
722 => DocId -> ServerT TableNgramsApi m
723 apiNgramsTableDoc dId = getTableNgramsDoc dId
724 :<|> tableNgramsPut
725 :<|> scoresRecomputeTableNgrams dId
726 :<|> getTableNgramsVersion dId
727 :<|> apiNgramsAsync dId
728 -- > index all the corpus accordingly (TODO AD)
729
730 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
731 apiNgramsAsync _dId =
732 serveJobsAPI $
733 JobFunction $ \i log ->
734 let
735 log' x = do
736 printDebug "tableNgramsPostChartsAsync" x
737 liftBase $ log x
738 in tableNgramsPostChartsAsync i log'
739
740 -- Did the given list of ngrams changed since the given version?
741 -- The returned value is versioned boolean value, meaning that one always retrieve the
742 -- latest version.
743 -- If the given version is negative then one simply receive the latest version and True.
744 -- Using this function is more precise than simply comparing the latest version number
745 -- with the local version number. Indeed there might be no change to this particular list
746 -- and still the version number has changed because of other lists.
747 --
748 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
749 -- * currentVersion: good computation, good bandwidth, bad precision.
750 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
751 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
752 listNgramsChangedSince :: RepoCmdM env err m
753 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
754 listNgramsChangedSince listId ngramsType version
755 | version < 0 =
756 Versioned <$> currentVersion <*> pure True
757 | otherwise =
758 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)