]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[ngrams] recompute chart automatically when ngrams changed
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24 {-# OPTIONS -fno-warn-orphans #-}
25
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , setListNgrams
33 --, rmListNgrams TODO fix before exporting
34 , putListNgrams
35 --, putListNgrams'
36 , apiNgramsTableCorpus
37 , apiNgramsTableDoc
38
39 , NgramsStatePatch
40 , NgramsTablePatch
41 , NgramsTableMap
42
43 , NgramsTerm(..)
44
45 , NgramsElement(..)
46 , mkNgramsElement
47 , mergeNgramsElement
48
49 , RootParent(..)
50
51 , MSet
52 , mSetFromList
53 , mSetToList
54
55 , Repo(..)
56 , r_version
57 , r_state
58 , r_history
59 , NgramsRepo
60 , NgramsRepoElement(..)
61 , saveRepo
62 , initRepo
63
64 , RepoEnv(..)
65 , renv_var
66 , renv_lock
67
68 , TabType(..)
69 , ngramsTypeFromTabType
70
71 , HasRepoVar(..)
72 , HasRepoSaver(..)
73 , HasRepo(..)
74 , RepoCmdM
75 , QueryParamR
76 , TODO
77
78 -- Internals
79 , getNgramsTableMap
80 , dumpJsonTableMap
81 , tableNgramsPull
82 , tableNgramsPut
83
84 , Version
85 , Versioned(..)
86 , currentVersion
87 , listNgramsChangedSince
88 )
89 where
90
91 import Control.Concurrent
92 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped)
93 import Control.Monad.Reader
94 import Data.Aeson hiding ((.=))
95 import qualified Data.Aeson.Text as DAT
96 import Data.Either (Either(..))
97 import Data.Foldable
98 import qualified Data.List as List
99 import Data.Map.Strict (Map)
100 import qualified Data.Map.Strict as Map
101 import qualified Data.Map.Strict.Patch as PM
102 import Data.Maybe (fromMaybe)
103 import Data.Monoid
104 import Data.Ord (Down(..))
105 import Data.Patch.Class (Action(act), Transformable(..), ours)
106 import qualified Data.Set as S
107 import qualified Data.Set as Set
108 import Data.Swagger hiding (version, patch)
109 import Data.Text (Text, isInfixOf, unpack)
110 import Data.Text.Lazy.IO as DTL
111 import Formatting (hprint, int, (%))
112 import Formatting.Clock (timeSpecs)
113 import GHC.Generics (Generic)
114 import Servant hiding (Patch)
115 import System.Clock (getTime, TimeSpec, Clock(..))
116 import System.IO (stderr)
117 import Test.QuickCheck (elements)
118 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
119
120 import Prelude (error)
121 import Gargantext.Prelude
122
123 import Gargantext.API.Ngrams.Types
124 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
125 import Gargantext.Core.Types (TODO)
126 import Gargantext.Core.Viz.Graph.API (graphRecompute)
127 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
128 import Gargantext.Database.Query.Table.Node.Select
129 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
130 import Gargantext.Database.Admin.Config (userMaster)
131 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
132 import Gargantext.Database.Admin.Types.Node (NodeType(..))
133 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
134 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
135
136 {-
137 -- TODO sequences of modifications (Patchs)
138 type NgramsIdPatch = Patch NgramsId NgramsPatch
139
140 ngramsPatch :: Int -> NgramsPatch
141 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
142
143 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
144 toEdit n p = Edit n p
145 ngramsIdPatch :: Patch NgramsId NgramsPatch
146 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
147 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
148 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
149 ]
150
151 -- applyPatchBack :: Patch -> IO Patch
152 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
153 -}
154 ------------------------------------------------------------------------
155 ------------------------------------------------------------------------
156 ------------------------------------------------------------------------
157
158 {-
159 -- TODO: Replace.old is ignored which means that if the current list
160 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
161 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
162 -- However this should not happen in non conflicting situations.
163 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
164 mkListsUpdate nt patches =
165 [ (ngramsTypeId nt, ng, listTypeId lt)
166 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
167 , lt <- patch ^.. patch_list . new
168 ]
169
170 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
171 -> NgramsType
172 -> NgramsTablePatch
173 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
174 mkChildrenGroups addOrRem nt patches =
175 [ (ngramsTypeId nt, parent, child)
176 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
177 , child <- patch ^.. patch_children . to addOrRem . folded
178 ]
179 -}
180
181 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
182 ngramsTypeFromTabType tabType =
183 let lieu = "Garg.API.Ngrams: " :: Text in
184 case tabType of
185 Sources -> TableNgrams.Sources
186 Authors -> TableNgrams.Authors
187 Institutes -> TableNgrams.Institutes
188 Terms -> TableNgrams.NgramsTerms
189 _ -> panic $ lieu <> "No Ngrams for this tab"
190 -- TODO: This `panic` would disapear with custom NgramsType.
191
192 ------------------------------------------------------------------------
193
194 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
195 => m ()
196 saveRepo = liftBase =<< view repoSaver
197
198 listTypeConflictResolution :: ListType -> ListType -> ListType
199 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
200
201 ngramsStatePatchConflictResolution
202 :: TableNgrams.NgramsType
203 -> NodeId
204 -> NgramsTerm
205 -> ConflictResolutionNgramsPatch
206 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
207 = (ours, (const ours, ours), (False, False))
208 -- (False, False) mean here that Mod has always priority.
209 -- (True, False) <- would mean priority to the left (same as ours).
210
211 -- undefined {- TODO think this through -}, listTypeConflictResolution)
212
213 -- Current state:
214 -- Insertions are not considered as patches,
215 -- they do not extend history,
216 -- they do not bump version.
217 insertNewOnly :: a -> Maybe b -> a
218 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
219 -- TODO error handling
220
221 something :: Monoid a => Maybe a -> a
222 something Nothing = mempty
223 something (Just a) = a
224
225 {- unused
226 -- TODO refactor with putListNgrams
227 copyListNgrams :: RepoCmdM env err m
228 => NodeId -> NodeId -> NgramsType
229 -> m ()
230 copyListNgrams srcListId dstListId ngramsType = do
231 var <- view repoVar
232 liftBase $ modifyMVar_ var $
233 pure . (r_state . at ngramsType %~ (Just . f . something))
234 saveRepo
235 where
236 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
237 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
238
239 -- TODO refactor with putListNgrams
240 -- The list must be non-empty!
241 -- The added ngrams must be non-existent!
242 addListNgrams :: RepoCmdM env err m
243 => NodeId -> NgramsType
244 -> [NgramsElement] -> m ()
245 addListNgrams listId ngramsType nes = do
246 var <- view repoVar
247 liftBase $ modifyMVar_ var $
248 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
249 saveRepo
250 where
251 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
252 -}
253
254 -- UNSAFE
255 rmListNgrams :: RepoCmdM env err m
256 => ListId
257 -> TableNgrams.NgramsType
258 -> m ()
259 rmListNgrams l nt = setListNgrams l nt mempty
260
261 -- | TODO: incr the Version number
262 -- && should use patch
263 -- UNSAFE
264 setListNgrams :: RepoCmdM env err m
265 => NodeId
266 -> TableNgrams.NgramsType
267 -> Map NgramsTerm NgramsRepoElement
268 -> m ()
269 setListNgrams listId ngramsType ns = do
270 var <- view repoVar
271 liftBase $ modifyMVar_ var $
272 pure . ( r_state
273 . at ngramsType %~
274 (Just .
275 (at listId .~ ( Just ns))
276 . something
277 )
278 )
279 saveRepo
280
281 -- NOTE
282 -- This is no longer part of the API.
283 -- This function is maintained for its usage in Database.Action.Flow.List.
284 -- If the given list of ngrams elements contains ngrams already in
285 -- the repo, they will be ignored.
286 putListNgrams :: (HasInvalidError err, RepoCmdM env err m)
287 => NodeId
288 -> TableNgrams.NgramsType
289 -> [NgramsElement]
290 -> m ()
291 putListNgrams _ _ [] = pure ()
292 putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
293 where
294 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
295
296 putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
297 => NodeId
298 -> TableNgrams.NgramsType
299 -> Map NgramsTerm NgramsRepoElement
300 -> m ()
301 putListNgrams' nodeId ngramsType ns = do
302 -- printDebug "[putListNgrams'] nodeId" nodeId
303 -- printDebug "[putListNgrams'] ngramsType" ngramsType
304 -- printDebug "[putListNgrams'] ns" ns
305
306 let p1 = NgramsTablePatch . PM.fromMap $ NgramsReplace Nothing . Just <$> ns
307 (p0, p0_validity) = PM.singleton nodeId p1
308 (p, p_validity) = PM.singleton ngramsType p0
309 assertValid p0_validity
310 assertValid p_validity
311 {-
312 -- TODO
313 v <- currentVersion
314 q <- commitStatePatch (Versioned v p)
315 assert empty q
316 -- What if another commit comes in between?
317 -- Shall we have a blindCommitStatePatch? It would not ask for a version but just a patch.
318 -- The modifyMVar_ would test the patch with applicable first.
319 -- If valid the rest would be atomic and no merge is required.
320 -}
321 var <- view repoVar
322 liftBase $ modifyMVar_ var $ \r -> do
323 pure $ r & r_version +~ 1
324 & r_history %~ (p :)
325 & r_state . at ngramsType %~
326 (Just .
327 (at nodeId %~
328 ( Just
329 . (<> ns)
330 . something
331 )
332 )
333 . something
334 )
335 saveRepo
336
337
338 currentVersion :: RepoCmdM env err m
339 => m Version
340 currentVersion = do
341 var <- view repoVar
342 r <- liftBase $ readMVar var
343 pure $ r ^. r_version
344
345
346 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
347 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
348 commitStatePatch (Versioned p_version p) = do
349 var <- view repoVar
350 vq' <- liftBase $ modifyMVar var $ \r -> do
351 let
352 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
353 (p', q') = transformWith ngramsStatePatchConflictResolution p q
354 r' = r & r_version +~ 1
355 & r_state %~ act p'
356 & r_history %~ (p' :)
357 {-
358 -- Ideally we would like to check these properties. However:
359 -- * They should be checked only to debug the code. The client data
360 -- should be able to trigger these.
361 -- * What kind of error should they throw (we are in IO here)?
362 -- * Should we keep modifyMVar?
363 -- * Should we throw the validation in an Exception, catch it around
364 -- modifyMVar and throw it back as an Error?
365 assertValid $ transformable p q
366 assertValid $ applicable p' (r ^. r_state)
367 -}
368 pure (r', Versioned (r' ^. r_version) q')
369
370 saveRepo
371 pure vq'
372
373 -- This is a special case of tableNgramsPut where the input patch is empty.
374 tableNgramsPull :: RepoCmdM env err m
375 => ListId
376 -> TableNgrams.NgramsType
377 -> Version
378 -> m (Versioned NgramsTablePatch)
379 tableNgramsPull listId ngramsType p_version = do
380 var <- view repoVar
381 r <- liftBase $ readMVar var
382
383 let
384 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
385 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
386
387 pure (Versioned (r ^. r_version) q_table)
388
389 -- Apply the given patch to the DB and returns the patch to be applied on the
390 -- client.
391 -- TODO-ACCESS check
392 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
393 => TabType
394 -> ListId
395 -> Versioned NgramsTablePatch
396 -> m (Versioned NgramsTablePatch)
397 tableNgramsPut tabType listId (Versioned p_version p_table)
398 | p_table == mempty = do
399 let ngramsType = ngramsTypeFromTabType tabType
400 tableNgramsPull listId ngramsType p_version
401
402 | otherwise = do
403 let ngramsType = ngramsTypeFromTabType tabType
404 (p0, p0_validity) = PM.singleton listId p_table
405 (p, p_validity) = PM.singleton ngramsType p0
406
407 assertValid p0_validity
408 assertValid p_validity
409
410 ret <- commitStatePatch (Versioned p_version p)
411 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
412
413 node <- getNodeWith ListId
414 let nId = _node_id node
415 uId = _node_userId node
416 recomputeGraph uId nId Conditional
417
418 pure ret
419
420 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
421 mergeNgramsElement _neOld neNew = neNew
422 {-
423 { _ne_list :: ListType
424 If we merge the parents/children we can potentially create cycles!
425 , _ne_parent :: Maybe NgramsTerm
426 , _ne_children :: MSet NgramsTerm
427 }
428 -}
429
430 getNgramsTableMap :: RepoCmdM env err m
431 => NodeId
432 -> TableNgrams.NgramsType
433 -> m (Versioned NgramsTableMap)
434 getNgramsTableMap nodeId ngramsType = do
435 v <- view repoVar
436 repo <- liftBase $ readMVar v
437 pure $ Versioned (repo ^. r_version)
438 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
439
440 dumpJsonTableMap :: RepoCmdM env err m
441 => Text
442 -> NodeId
443 -> TableNgrams.NgramsType
444 -> m ()
445 dumpJsonTableMap fpath nodeId ngramsType = do
446 m <- getNgramsTableMap nodeId ngramsType
447 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
448 pure ()
449
450 type MinSize = Int
451 type MaxSize = Int
452
453 -- | TODO Errors management
454 -- TODO: polymorphic for Annuaire or Corpus or ...
455 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
456 -- TODO: should take only one ListId
457
458 getTime' :: MonadBase IO m => m TimeSpec
459 getTime' = liftBase $ getTime ProcessCPUTime
460
461
462 getTableNgrams :: forall env err m.
463 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
464 => NodeType -> NodeId -> TabType
465 -> ListId -> Limit -> Maybe Offset
466 -> Maybe ListType
467 -> Maybe MinSize -> Maybe MaxSize
468 -> Maybe OrderBy
469 -> (NgramsTerm -> Bool)
470 -> m (Versioned NgramsTable)
471 getTableNgrams _nType nId tabType listId limit_ offset
472 listType minSize maxSize orderBy searchQuery = do
473
474 t0 <- getTime'
475 -- lIds <- selectNodesWithUsername NodeList userMaster
476 let
477 ngramsType = ngramsTypeFromTabType tabType
478 offset' = maybe 0 identity offset
479 listType' = maybe (const True) (==) listType
480 minSize' = maybe (const True) (<=) minSize
481 maxSize' = maybe (const True) (>=) maxSize
482
483 selected_node n = minSize' s
484 && maxSize' s
485 && searchQuery (n ^. ne_ngrams)
486 && listType' (n ^. ne_list)
487 where
488 s = n ^. ne_size
489
490 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
491
492 ---------------------------------------
493 sortOnOrder Nothing = identity
494 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
495 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
496 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
497 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
498
499 ---------------------------------------
500 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
501 selectAndPaginate tableMap = roots <> inners
502 where
503 list = tableMap ^.. each
504 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
505 (ne ^. ne_root)
506 selected_nodes = list & take limit_
507 . drop offset'
508 . filter selected_node
509 . sortOnOrder orderBy
510 roots = rootOf <$> selected_nodes
511 rootsSet = Set.fromList (_ne_ngrams <$> roots)
512 inners = list & filter (selected_inner rootsSet)
513
514 ---------------------------------------
515 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
516 setScores False table = pure table
517 setScores True table = do
518 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
519 t1 <- getTime'
520 occurrences <- getOccByNgramsOnlyFast' nId
521 listId
522 ngramsType
523 ngrams_terms
524 t2 <- getTime'
525 liftBase $ hprint stderr
526 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
527 (length ngrams_terms) t1 t2
528 {-
529 occurrences <- getOccByNgramsOnlySlow nType nId
530 (lIds <> [listId])
531 ngramsType
532 ngrams_terms
533 -}
534 let
535 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
536
537 pure $ table & each %~ setOcc
538 ---------------------------------------
539
540 -- lists <- catMaybes <$> listsWith userMaster
541 -- trace (show lists) $
542 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
543
544 let scoresNeeded = needsScores orderBy
545 tableMap1 <- getNgramsTableMap listId ngramsType
546 t1 <- getTime'
547 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
548 . Map.mapWithKey ngramsElementFromRepo
549 t2 <- getTime'
550 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
551 . setScores (not scoresNeeded)
552 . selectAndPaginate
553 t3 <- getTime'
554 liftBase $ hprint stderr
555 ("getTableNgrams total=" % timeSpecs
556 % " map1=" % timeSpecs
557 % " map2=" % timeSpecs
558 % " map3=" % timeSpecs
559 % " sql=" % (if scoresNeeded then "map2" else "map3")
560 % "\n"
561 ) t0 t3 t0 t1 t1 t2 t2 t3
562 pure tableMap3
563
564
565 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
566 scoresRecomputeTableNgrams nId tabType listId = do
567 tableMap <- getNgramsTableMap listId ngramsType
568 _ <- tableMap & v_data %%~ setScores
569 . Map.mapWithKey ngramsElementFromRepo
570
571 pure $ 1
572 where
573 ngramsType = ngramsTypeFromTabType tabType
574
575 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
576 setScores table = do
577 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
578 occurrences <- getOccByNgramsOnlyFast' nId
579 listId
580 ngramsType
581 ngrams_terms
582 let
583 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
584
585 pure $ table & each %~ setOcc
586
587
588
589 -- APIs
590
591 -- TODO: find a better place for the code above, All APIs stay here
592 type QueryParamR = QueryParam' '[Required, Strict]
593
594 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
595 deriving (Generic, Enum, Bounded, Read, Show)
596
597 instance FromHttpApiData OrderBy
598 where
599 parseUrlPiece "TermAsc" = pure TermAsc
600 parseUrlPiece "TermDesc" = pure TermDesc
601 parseUrlPiece "ScoreAsc" = pure ScoreAsc
602 parseUrlPiece "ScoreDesc" = pure ScoreDesc
603 parseUrlPiece _ = Left "Unexpected value of OrderBy"
604
605
606 instance ToParamSchema OrderBy
607 instance FromJSON OrderBy
608 instance ToJSON OrderBy
609 instance ToSchema OrderBy
610 instance Arbitrary OrderBy
611 where
612 arbitrary = elements [minBound..maxBound]
613
614 needsScores :: Maybe OrderBy -> Bool
615 needsScores (Just ScoreAsc) = True
616 needsScores (Just ScoreDesc) = True
617 needsScores _ = False
618
619 type TableNgramsApiGet = Summary " Table Ngrams API Get"
620 :> QueryParamR "ngramsType" TabType
621 :> QueryParamR "list" ListId
622 :> QueryParamR "limit" Limit
623 :> QueryParam "offset" Offset
624 :> QueryParam "listType" ListType
625 :> QueryParam "minTermSize" MinSize
626 :> QueryParam "maxTermSize" MaxSize
627 :> QueryParam "orderBy" OrderBy
628 :> QueryParam "search" Text
629 :> Get '[JSON] (Versioned NgramsTable)
630
631 type TableNgramsApiPut = Summary " Table Ngrams API Change"
632 :> QueryParamR "ngramsType" TabType
633 :> QueryParamR "list" ListId
634 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
635 :> Put '[JSON] (Versioned NgramsTablePatch)
636
637 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
638 :> QueryParamR "ngramsType" TabType
639 :> QueryParamR "list" ListId
640 :> "recompute" :> Post '[JSON] Int
641
642 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
643 :> QueryParamR "ngramsType" TabType
644 :> QueryParamR "list" ListId
645 :> Get '[JSON] Version
646
647 type TableNgramsApi = TableNgramsApiGet
648 :<|> TableNgramsApiPut
649 :<|> RecomputeScoresNgramsApiGet
650 :<|> "version" :> TableNgramsApiGetVersion
651
652 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
653 => NodeId
654 -> TabType
655 -> ListId
656 -> Limit
657 -> Maybe Offset
658 -> Maybe ListType
659 -> Maybe MinSize -> Maybe MaxSize
660 -> Maybe OrderBy
661 -> Maybe Text -- full text search
662 -> m (Versioned NgramsTable)
663 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
664 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
665 where
666 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
667
668 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
669 => NodeId
670 -> TabType
671 -> ListId
672 -> m Version
673 getTableNgramsVersion _nId _tabType _listId = currentVersion
674 -- TODO: limit?
675 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
676 -- This line above looks like a waste of computation to finally get only the version.
677 -- See the comment about listNgramsChangedSince.
678
679
680 -- | Text search is deactivated for now for ngrams by doc only
681 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
682 => DocId -> TabType
683 -> ListId -> Limit -> Maybe Offset
684 -> Maybe ListType
685 -> Maybe MinSize -> Maybe MaxSize
686 -> Maybe OrderBy
687 -> Maybe Text -- full text search
688 -> m (Versioned NgramsTable)
689 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
690 ns <- selectNodesWithUsername NodeList userMaster
691 let ngramsType = ngramsTypeFromTabType tabType
692 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
693 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
694 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
695
696
697
698 apiNgramsTableCorpus :: ( RepoCmdM env err m
699 , HasNodeError err
700 , HasInvalidError err
701 , HasConnectionPool env
702 , HasConfig env
703 )
704 => NodeId -> ServerT TableNgramsApi m
705 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
706 :<|> tableNgramsPut
707 :<|> scoresRecomputeTableNgrams cId
708 :<|> getTableNgramsVersion cId
709
710 apiNgramsTableDoc :: ( RepoCmdM env err m
711 , HasNodeError err
712 , HasInvalidError err
713 , HasConnectionPool env
714 , HasConfig env
715 )
716 => DocId -> ServerT TableNgramsApi m
717 apiNgramsTableDoc dId = getTableNgramsDoc dId
718 :<|> tableNgramsPut
719 :<|> scoresRecomputeTableNgrams dId
720 :<|> getTableNgramsVersion dId
721 -- > index all the corpus accordingly (TODO AD)
722
723 -- Did the given list of ngrams changed since the given version?
724 -- The returned value is versioned boolean value, meaning that one always retrieve the
725 -- latest version.
726 -- If the given version is negative then one simply receive the latest version and True.
727 -- Using this function is more precise than simply comparing the latest version number
728 -- with the local version number. Indeed there might be no change to this particular list
729 -- and still the version number has changed because of other lists.
730 --
731 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
732 -- * currentVersion: good computation, good bandwidth, bad precision.
733 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
734 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
735 listNgramsChangedSince :: RepoCmdM env err m
736 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
737 listNgramsChangedSince listId ngramsType version
738 | version < 0 =
739 Versioned <$> currentVersion <*> pure True
740 | otherwise =
741 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)