]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Merge branch 'dev' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell-gargantext...
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , putListNgrams
34 --, putListNgrams'
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsStatePatch
39 , NgramsTablePatch
40 , NgramsTableMap
41
42 , NgramsTerm(..)
43
44 , NgramsElement(..)
45 , mkNgramsElement
46
47 , RootParent(..)
48
49 , MSet
50 , mSetFromList
51 , mSetToList
52
53 , Repo(..)
54 , r_version
55 , r_state
56 , r_history
57 , NgramsRepo
58 , NgramsRepoElement(..)
59 , saveRepo
60 , initRepo
61
62 , RepoEnv(..)
63 , renv_var
64 , renv_lock
65
66 , TabType(..)
67 , ngramsTypeFromTabType
68
69 , HasRepoVar(..)
70 , HasRepoSaver(..)
71 , HasRepo(..)
72 , RepoCmdM
73 , QueryParamR
74 , TODO
75
76 -- Internals
77 , getNgramsTableMap
78 , dumpJsonTableMap
79 , tableNgramsPull
80 , tableNgramsPut
81
82 , Version
83 , Versioned(..)
84 , currentVersion
85 , listNgramsChangedSince
86 )
87 where
88
89 import Control.Concurrent
90 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped)
91 import Control.Monad.Reader
92 import Data.Aeson hiding ((.=))
93 import qualified Data.Aeson.Text as DAT
94 import Data.Either (Either(..))
95 import Data.Foldable
96 import qualified Data.List as List
97 import Data.Map.Strict (Map)
98 import qualified Data.Map.Strict as Map
99 import qualified Data.Map.Strict.Patch as PM
100 import Data.Maybe (fromMaybe)
101 import Data.Monoid
102 import Data.Ord (Down(..))
103 import Data.Patch.Class (Action(act), Transformable(..), ours)
104 import qualified Data.Set as S
105 import qualified Data.Set as Set
106 import Data.Swagger hiding (version, patch)
107 import Data.Text (Text, isInfixOf, unpack)
108 import Data.Text.Lazy.IO as DTL
109 import Formatting (hprint, int, (%))
110 import Formatting.Clock (timeSpecs)
111 import GHC.Generics (Generic)
112 import Servant hiding (Patch)
113 import System.Clock (getTime, TimeSpec, Clock(..))
114 import System.IO (stderr)
115 import Test.QuickCheck (elements)
116 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
117
118 import Prelude (error)
119 import Gargantext.Prelude
120
121 import Gargantext.API.Admin.Types (HasSettings)
122 import Gargantext.API.Ngrams.Types
123 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
124 import Gargantext.Core.Types (TODO)
125 import Gargantext.Core.Viz.Graph.API (recomputeGraph)
126 import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
127 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
128 import Gargantext.Database.Query.Table.Node.Select
129 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
130 import Gargantext.Database.Admin.Config (userMaster)
131 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
132 import Gargantext.Database.Admin.Types.Node (NodeType(..))
133 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
134 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
135 import Gargantext.Database.Query.Table.Node (getNode)
136 import Gargantext.Database.Schema.Node (NodePoly(..))
137
138 {-
139 -- TODO sequences of modifications (Patchs)
140 type NgramsIdPatch = Patch NgramsId NgramsPatch
141
142 ngramsPatch :: Int -> NgramsPatch
143 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
144
145 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
146 toEdit n p = Edit n p
147 ngramsIdPatch :: Patch NgramsId NgramsPatch
148 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
149 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
150 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
151 ]
152
153 -- applyPatchBack :: Patch -> IO Patch
154 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
155 -}
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158 ------------------------------------------------------------------------
159
160 {-
161 -- TODO: Replace.old is ignored which means that if the current list
162 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
163 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
164 -- However this should not happen in non conflicting situations.
165 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
166 mkListsUpdate nt patches =
167 [ (ngramsTypeId nt, ng, listTypeId lt)
168 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
169 , lt <- patch ^.. patch_list . new
170 ]
171
172 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
173 -> NgramsType
174 -> NgramsTablePatch
175 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
176 mkChildrenGroups addOrRem nt patches =
177 [ (ngramsTypeId nt, parent, child)
178 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
179 , child <- patch ^.. patch_children . to addOrRem . folded
180 ]
181 -}
182
183 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
184 ngramsTypeFromTabType tabType =
185 let lieu = "Garg.API.Ngrams: " :: Text in
186 case tabType of
187 Sources -> TableNgrams.Sources
188 Authors -> TableNgrams.Authors
189 Institutes -> TableNgrams.Institutes
190 Terms -> TableNgrams.NgramsTerms
191 _ -> panic $ lieu <> "No Ngrams for this tab"
192 -- TODO: This `panic` would disapear with custom NgramsType.
193
194 ------------------------------------------------------------------------
195
196 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
197 => m ()
198 saveRepo = liftBase =<< view repoSaver
199
200 listTypeConflictResolution :: ListType -> ListType -> ListType
201 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
202
203 ngramsStatePatchConflictResolution
204 :: TableNgrams.NgramsType
205 -> NodeId
206 -> NgramsTerm
207 -> ConflictResolutionNgramsPatch
208 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
209 = (ours, (const ours, ours), (False, False))
210 -- (False, False) mean here that Mod has always priority.
211 -- (True, False) <- would mean priority to the left (same as ours).
212
213 -- undefined {- TODO think this through -}, listTypeConflictResolution)
214
215 -- Current state:
216 -- Insertions are not considered as patches,
217 -- they do not extend history,
218 -- they do not bump version.
219 insertNewOnly :: a -> Maybe b -> a
220 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
221 -- TODO error handling
222
223 something :: Monoid a => Maybe a -> a
224 something Nothing = mempty
225 something (Just a) = a
226
227 {- unused
228 -- TODO refactor with putListNgrams
229 copyListNgrams :: RepoCmdM env err m
230 => NodeId -> NodeId -> NgramsType
231 -> m ()
232 copyListNgrams srcListId dstListId ngramsType = do
233 var <- view repoVar
234 liftBase $ modifyMVar_ var $
235 pure . (r_state . at ngramsType %~ (Just . f . something))
236 saveRepo
237 where
238 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
239 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
240
241 -- TODO refactor with putListNgrams
242 -- The list must be non-empty!
243 -- The added ngrams must be non-existent!
244 addListNgrams :: RepoCmdM env err m
245 => NodeId -> NgramsType
246 -> [NgramsElement] -> m ()
247 addListNgrams listId ngramsType nes = do
248 var <- view repoVar
249 liftBase $ modifyMVar_ var $
250 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
251 saveRepo
252 where
253 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
254 -}
255
256 -- UNSAFE
257 rmListNgrams :: RepoCmdM env err m
258 => ListId
259 -> TableNgrams.NgramsType
260 -> m ()
261 rmListNgrams l nt = setListNgrams l nt mempty
262
263 -- | TODO: incr the Version number
264 -- && should use patch
265 -- UNSAFE
266 setListNgrams :: RepoCmdM env err m
267 => NodeId
268 -> TableNgrams.NgramsType
269 -> Map NgramsTerm NgramsRepoElement
270 -> m ()
271 setListNgrams listId ngramsType ns = do
272 var <- view repoVar
273 liftBase $ modifyMVar_ var $
274 pure . ( r_state
275 . at ngramsType %~
276 (Just .
277 (at listId .~ ( Just ns))
278 . something
279 )
280 )
281 saveRepo
282
283 -- NOTE
284 -- This is no longer part of the API.
285 -- This function is maintained for its usage in Database.Action.Flow.List.
286 -- If the given list of ngrams elements contains ngrams already in
287 -- the repo, they will be ignored.
288 putListNgrams :: (HasInvalidError err, RepoCmdM env err m)
289 => NodeId
290 -> TableNgrams.NgramsType
291 -> [NgramsElement]
292 -> m ()
293 putListNgrams _ _ [] = pure ()
294 putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
295 where
296 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
297
298 putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
299 => NodeId
300 -> TableNgrams.NgramsType
301 -> Map NgramsTerm NgramsRepoElement
302 -> m ()
303 putListNgrams' nodeId ngramsType ns = do
304 -- printDebug "[putListNgrams'] nodeId" nodeId
305 -- printDebug "[putListNgrams'] ngramsType" ngramsType
306 -- printDebug "[putListNgrams'] ns" ns
307
308 let p1 = NgramsTablePatch . PM.fromMap $ NgramsReplace Nothing . Just <$> ns
309 (p0, p0_validity) = PM.singleton nodeId p1
310 (p, p_validity) = PM.singleton ngramsType p0
311 assertValid p0_validity
312 assertValid p_validity
313 {-
314 -- TODO
315 v <- currentVersion
316 q <- commitStatePatch (Versioned v p)
317 assert empty q
318 -- What if another commit comes in between?
319 -- Shall we have a blindCommitStatePatch? It would not ask for a version but just a patch.
320 -- The modifyMVar_ would test the patch with applicable first.
321 -- If valid the rest would be atomic and no merge is required.
322 -}
323 var <- view repoVar
324 liftBase $ modifyMVar_ var $ \r -> do
325 pure $ r & r_version +~ 1
326 & r_history %~ (p :)
327 & r_state . at ngramsType %~
328 (Just .
329 (at nodeId %~
330 ( Just
331 . (<> ns)
332 . something
333 )
334 )
335 . something
336 )
337 saveRepo
338
339
340 currentVersion :: RepoCmdM env err m
341 => m Version
342 currentVersion = do
343 var <- view repoVar
344 r <- liftBase $ readMVar var
345 pure $ r ^. r_version
346
347
348 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
349 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
350 commitStatePatch (Versioned p_version p) = do
351 var <- view repoVar
352 vq' <- liftBase $ modifyMVar var $ \r -> do
353 let
354 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
355 (p', q') = transformWith ngramsStatePatchConflictResolution p q
356 r' = r & r_version +~ 1
357 & r_state %~ act p'
358 & r_history %~ (p' :)
359 {-
360 -- Ideally we would like to check these properties. However:
361 -- * They should be checked only to debug the code. The client data
362 -- should be able to trigger these.
363 -- * What kind of error should they throw (we are in IO here)?
364 -- * Should we keep modifyMVar?
365 -- * Should we throw the validation in an Exception, catch it around
366 -- modifyMVar and throw it back as an Error?
367 assertValid $ transformable p q
368 assertValid $ applicable p' (r ^. r_state)
369 -}
370 pure (r', Versioned (r' ^. r_version) q')
371
372 saveRepo
373 pure vq'
374
375 -- This is a special case of tableNgramsPut where the input patch is empty.
376 tableNgramsPull :: RepoCmdM env err m
377 => ListId
378 -> TableNgrams.NgramsType
379 -> Version
380 -> m (Versioned NgramsTablePatch)
381 tableNgramsPull listId ngramsType p_version = do
382 var <- view repoVar
383 r <- liftBase $ readMVar var
384
385 let
386 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
387 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
388
389 pure (Versioned (r ^. r_version) q_table)
390
391 -- Apply the given patch to the DB and returns the patch to be applied on the
392 -- client.
393 -- TODO-ACCESS check
394 tableNgramsPut :: (HasNodeError err,
395 HasInvalidError err,
396 HasConfig env,
397 HasConnectionPool env,
398 HasSettings env,
399 RepoCmdM env err m)
400 => TabType
401 -> ListId
402 -> Versioned NgramsTablePatch
403 -> m (Versioned NgramsTablePatch)
404 tableNgramsPut tabType listId (Versioned p_version p_table)
405 | p_table == mempty = do
406 let ngramsType = ngramsTypeFromTabType tabType
407 tableNgramsPull listId ngramsType p_version
408
409 | otherwise = do
410 let ngramsType = ngramsTypeFromTabType tabType
411 (p0, p0_validity) = PM.singleton listId p_table
412 (p, p_validity) = PM.singleton ngramsType p0
413
414 assertValid p0_validity
415 assertValid p_validity
416
417 ret <- commitStatePatch (Versioned p_version p)
418 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
419
420 node <- getNode listId
421 let nId = _node_id node
422 uId = _node_userId node
423 _ <- recomputeGraph uId nId Conditional
424
425 pure ret
426
427 {-
428 { _ne_list :: ListType
429 If we merge the parents/children we can potentially create cycles!
430 , _ne_parent :: Maybe NgramsTerm
431 , _ne_children :: MSet NgramsTerm
432 }
433 -}
434
435 getNgramsTableMap :: RepoCmdM env err m
436 => NodeId
437 -> TableNgrams.NgramsType
438 -> m (Versioned NgramsTableMap)
439 getNgramsTableMap nodeId ngramsType = do
440 v <- view repoVar
441 repo <- liftBase $ readMVar v
442 pure $ Versioned (repo ^. r_version)
443 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
444
445 dumpJsonTableMap :: RepoCmdM env err m
446 => Text
447 -> NodeId
448 -> TableNgrams.NgramsType
449 -> m ()
450 dumpJsonTableMap fpath nodeId ngramsType = do
451 m <- getNgramsTableMap nodeId ngramsType
452 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
453 pure ()
454
455 type MinSize = Int
456 type MaxSize = Int
457
458 -- | TODO Errors management
459 -- TODO: polymorphic for Annuaire or Corpus or ...
460 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
461 -- TODO: should take only one ListId
462
463 getTime' :: MonadBase IO m => m TimeSpec
464 getTime' = liftBase $ getTime ProcessCPUTime
465
466
467 getTableNgrams :: forall env err m.
468 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
469 => NodeType -> NodeId -> TabType
470 -> ListId -> Limit -> Maybe Offset
471 -> Maybe ListType
472 -> Maybe MinSize -> Maybe MaxSize
473 -> Maybe OrderBy
474 -> (NgramsTerm -> Bool)
475 -> m (Versioned NgramsTable)
476 getTableNgrams _nType nId tabType listId limit_ offset
477 listType minSize maxSize orderBy searchQuery = do
478
479 t0 <- getTime'
480 -- lIds <- selectNodesWithUsername NodeList userMaster
481 let
482 ngramsType = ngramsTypeFromTabType tabType
483 offset' = maybe 0 identity offset
484 listType' = maybe (const True) (==) listType
485 minSize' = maybe (const True) (<=) minSize
486 maxSize' = maybe (const True) (>=) maxSize
487
488 selected_node n = minSize' s
489 && maxSize' s
490 && searchQuery (n ^. ne_ngrams)
491 && listType' (n ^. ne_list)
492 where
493 s = n ^. ne_size
494
495 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
496
497 ---------------------------------------
498 sortOnOrder Nothing = identity
499 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
500 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
501 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
502 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
503
504 ---------------------------------------
505 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
506 selectAndPaginate tableMap = roots <> inners
507 where
508 list = tableMap ^.. each
509 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
510 (ne ^. ne_root)
511 selected_nodes = list & take limit_
512 . drop offset'
513 . filter selected_node
514 . sortOnOrder orderBy
515 roots = rootOf <$> selected_nodes
516 rootsSet = Set.fromList (_ne_ngrams <$> roots)
517 inners = list & filter (selected_inner rootsSet)
518
519 ---------------------------------------
520 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
521 setScores False table = pure table
522 setScores True table = do
523 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
524 t1 <- getTime'
525 occurrences <- getOccByNgramsOnlyFast' nId
526 listId
527 ngramsType
528 ngrams_terms
529 t2 <- getTime'
530 liftBase $ hprint stderr
531 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
532 (length ngrams_terms) t1 t2
533 {-
534 occurrences <- getOccByNgramsOnlySlow nType nId
535 (lIds <> [listId])
536 ngramsType
537 ngrams_terms
538 -}
539 let
540 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
541
542 pure $ table & each %~ setOcc
543 ---------------------------------------
544
545 -- lists <- catMaybes <$> listsWith userMaster
546 -- trace (show lists) $
547 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
548
549 let scoresNeeded = needsScores orderBy
550 tableMap1 <- getNgramsTableMap listId ngramsType
551 t1 <- getTime'
552 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
553 . Map.mapWithKey ngramsElementFromRepo
554 t2 <- getTime'
555 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
556 . setScores (not scoresNeeded)
557 . selectAndPaginate
558 t3 <- getTime'
559 liftBase $ hprint stderr
560 ("getTableNgrams total=" % timeSpecs
561 % " map1=" % timeSpecs
562 % " map2=" % timeSpecs
563 % " map3=" % timeSpecs
564 % " sql=" % (if scoresNeeded then "map2" else "map3")
565 % "\n"
566 ) t0 t3 t0 t1 t1 t2 t2 t3
567 pure tableMap3
568
569
570 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
571 scoresRecomputeTableNgrams nId tabType listId = do
572 tableMap <- getNgramsTableMap listId ngramsType
573 _ <- tableMap & v_data %%~ setScores
574 . Map.mapWithKey ngramsElementFromRepo
575
576 pure $ 1
577 where
578 ngramsType = ngramsTypeFromTabType tabType
579
580 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
581 setScores table = do
582 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
583 occurrences <- getOccByNgramsOnlyFast' nId
584 listId
585 ngramsType
586 ngrams_terms
587 let
588 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
589
590 pure $ table & each %~ setOcc
591
592
593
594 -- APIs
595
596 -- TODO: find a better place for the code above, All APIs stay here
597
598 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
599 deriving (Generic, Enum, Bounded, Read, Show)
600
601 instance FromHttpApiData OrderBy
602 where
603 parseUrlPiece "TermAsc" = pure TermAsc
604 parseUrlPiece "TermDesc" = pure TermDesc
605 parseUrlPiece "ScoreAsc" = pure ScoreAsc
606 parseUrlPiece "ScoreDesc" = pure ScoreDesc
607 parseUrlPiece _ = Left "Unexpected value of OrderBy"
608
609
610 instance ToParamSchema OrderBy
611 instance FromJSON OrderBy
612 instance ToJSON OrderBy
613 instance ToSchema OrderBy
614 instance Arbitrary OrderBy
615 where
616 arbitrary = elements [minBound..maxBound]
617
618 needsScores :: Maybe OrderBy -> Bool
619 needsScores (Just ScoreAsc) = True
620 needsScores (Just ScoreDesc) = True
621 needsScores _ = False
622
623 type TableNgramsApiGet = Summary " Table Ngrams API Get"
624 :> QueryParamR "ngramsType" TabType
625 :> QueryParamR "list" ListId
626 :> QueryParamR "limit" Limit
627 :> QueryParam "offset" Offset
628 :> QueryParam "listType" ListType
629 :> QueryParam "minTermSize" MinSize
630 :> QueryParam "maxTermSize" MaxSize
631 :> QueryParam "orderBy" OrderBy
632 :> QueryParam "search" Text
633 :> Get '[JSON] (Versioned NgramsTable)
634
635 type TableNgramsApiPut = Summary " Table Ngrams API Change"
636 :> QueryParamR "ngramsType" TabType
637 :> QueryParamR "list" ListId
638 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
639 :> Put '[JSON] (Versioned NgramsTablePatch)
640
641 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
642 :> QueryParamR "ngramsType" TabType
643 :> QueryParamR "list" ListId
644 :> "recompute" :> Post '[JSON] Int
645
646 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
647 :> QueryParamR "ngramsType" TabType
648 :> QueryParamR "list" ListId
649 :> Get '[JSON] Version
650
651 type TableNgramsApi = TableNgramsApiGet
652 :<|> TableNgramsApiPut
653 :<|> RecomputeScoresNgramsApiGet
654 :<|> "version" :> TableNgramsApiGetVersion
655
656 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
657 => NodeId
658 -> TabType
659 -> ListId
660 -> Limit
661 -> Maybe Offset
662 -> Maybe ListType
663 -> Maybe MinSize -> Maybe MaxSize
664 -> Maybe OrderBy
665 -> Maybe Text -- full text search
666 -> m (Versioned NgramsTable)
667 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
668 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
669 where
670 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
671
672 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
673 => NodeId
674 -> TabType
675 -> ListId
676 -> m Version
677 getTableNgramsVersion _nId _tabType _listId = currentVersion
678 -- TODO: limit?
679 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
680 -- This line above looks like a waste of computation to finally get only the version.
681 -- See the comment about listNgramsChangedSince.
682
683
684 -- | Text search is deactivated for now for ngrams by doc only
685 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
686 => DocId -> TabType
687 -> ListId -> Limit -> Maybe Offset
688 -> Maybe ListType
689 -> Maybe MinSize -> Maybe MaxSize
690 -> Maybe OrderBy
691 -> Maybe Text -- full text search
692 -> m (Versioned NgramsTable)
693 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
694 ns <- selectNodesWithUsername NodeList userMaster
695 let ngramsType = ngramsTypeFromTabType tabType
696 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
697 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
698 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
699
700
701
702 apiNgramsTableCorpus :: ( RepoCmdM env err m
703 , HasNodeError err
704 , HasInvalidError err
705 , HasConnectionPool env
706 , HasConfig env
707 , HasSettings env
708 )
709 => NodeId -> ServerT TableNgramsApi m
710 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
711 :<|> tableNgramsPut
712 :<|> scoresRecomputeTableNgrams cId
713 :<|> getTableNgramsVersion cId
714
715 apiNgramsTableDoc :: ( RepoCmdM env err m
716 , HasNodeError err
717 , HasInvalidError err
718 , HasConnectionPool env
719 , HasConfig env
720 , HasSettings env
721 )
722 => DocId -> ServerT TableNgramsApi m
723 apiNgramsTableDoc dId = getTableNgramsDoc dId
724 :<|> tableNgramsPut
725 :<|> scoresRecomputeTableNgrams dId
726 :<|> getTableNgramsVersion dId
727 -- > index all the corpus accordingly (TODO AD)
728
729 -- Did the given list of ngrams changed since the given version?
730 -- The returned value is versioned boolean value, meaning that one always retrieve the
731 -- latest version.
732 -- If the given version is negative then one simply receive the latest version and True.
733 -- Using this function is more precise than simply comparing the latest version number
734 -- with the local version number. Indeed there might be no change to this particular list
735 -- and still the version number has changed because of other lists.
736 --
737 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
738 -- * currentVersion: good computation, good bandwidth, bad precision.
739 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
740 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
741 listNgramsChangedSince :: RepoCmdM env err m
742 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
743 listNgramsChangedSince listId ngramsType version
744 | version < 0 =
745 Versioned <$> currentVersion <*> pure True
746 | otherwise =
747 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)