]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Remove some unnecssary language pragmas
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TypeOperators #-}
22 {-# LANGUAGE TypeFamilies #-}
23
24 module Gargantext.API.Ngrams
25 ( TableNgramsApi
26 , TableNgramsApiGet
27 , TableNgramsApiPut
28
29 , getTableNgrams
30 , setListNgrams
31 --, rmListNgrams TODO fix before exporting
32 , apiNgramsTableCorpus
33 , apiNgramsTableDoc
34
35 , NgramsStatePatch
36 , NgramsTablePatch
37 , NgramsTableMap
38
39 , NgramsTerm(..)
40
41 , NgramsElement(..)
42 , mkNgramsElement
43
44 , RootParent(..)
45
46 , MSet
47 , mSetFromList
48 , mSetToList
49
50 , Repo(..)
51 , r_version
52 , r_state
53 , r_history
54 , NgramsRepo
55 , NgramsRepoElement(..)
56 , saveRepo
57 , initRepo
58
59 , RepoEnv(..)
60 , renv_var
61 , renv_lock
62
63 , TabType(..)
64
65 , HasRepoVar(..)
66 , HasRepoSaver(..)
67 , HasRepo(..)
68 , RepoCmdM
69 , QueryParamR
70 , TODO
71
72 -- Internals
73 , getNgramsTableMap
74 , dumpJsonTableMap
75 , tableNgramsPull
76 , tableNgramsPut
77
78 , Version
79 , Versioned(..)
80 , currentVersion
81 , listNgramsChangedSince
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import qualified Data.Aeson.Text as DAT
90 import Data.Either (Either(..))
91 import Data.Foldable
92 import qualified Data.List as List
93 import Data.Map.Strict (Map)
94 import qualified Data.Map.Strict as Map
95 import qualified Data.Map.Strict.Patch as PM
96 import Data.Maybe (fromMaybe)
97 import Data.Monoid
98 import Data.Ord (Down(..))
99 import Data.Patch.Class (Action(act), Transformable(..), ours)
100 import qualified Data.Set as S
101 import qualified Data.Set as Set
102 import Data.Swagger hiding (version, patch)
103 import Data.Text (Text, isInfixOf, unpack)
104 import Data.Text.Lazy.IO as DTL
105 import Formatting (hprint, int, (%))
106 import Formatting.Clock (timeSpecs)
107 import GHC.Generics (Generic)
108 import Servant hiding (Patch)
109 import System.Clock (getTime, TimeSpec, Clock(..))
110 import System.IO (stderr)
111 import Test.QuickCheck (elements)
112 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
113
114 import Prelude (error)
115 import Gargantext.Prelude
116
117 import Gargantext.API.Admin.Types (HasSettings)
118 import qualified Gargantext.API.Metrics as Metrics
119 import Gargantext.API.Ngrams.Types
120 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, TODO, assertValid)
121 import Gargantext.Core.Utils (something)
122 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
123 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
124 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
125 import Gargantext.Database.Query.Table.Node.Select
126 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
127 import Gargantext.Database.Admin.Config (userMaster)
128 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
129 import Gargantext.Database.Admin.Types.Node (NodeType(..))
130 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
131 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
132 import Gargantext.Database.Query.Table.Node (getNode)
133 import Gargantext.Database.Query.Tree.Error (HasTreeError)
134 import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
135
136 {-
137 -- TODO sequences of modifications (Patchs)
138 type NgramsIdPatch = Patch NgramsId NgramsPatch
139
140 ngramsPatch :: Int -> NgramsPatch
141 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
142
143 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
144 toEdit n p = Edit n p
145 ngramsIdPatch :: Patch NgramsId NgramsPatch
146 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
147 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
148 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
149 ]
150
151 -- applyPatchBack :: Patch -> IO Patch
152 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
153 -}
154 ------------------------------------------------------------------------
155 ------------------------------------------------------------------------
156 ------------------------------------------------------------------------
157
158 {-
159 -- TODO: Replace.old is ignored which means that if the current list
160 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
161 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
162 -- However this should not happen in non conflicting situations.
163 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
164 mkListsUpdate nt patches =
165 [ (ngramsTypeId nt, ng, listTypeId lt)
166 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
167 , lt <- patch ^.. patch_list . new
168 ]
169
170 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
171 -> NgramsType
172 -> NgramsTablePatch
173 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
174 mkChildrenGroups addOrRem nt patches =
175 [ (ngramsTypeId nt, parent, child)
176 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
177 , child <- patch ^.. patch_children . to addOrRem . folded
178 ]
179 -}
180
181 ------------------------------------------------------------------------
182
183 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
184 => m ()
185 saveRepo = liftBase =<< view repoSaver
186
187 listTypeConflictResolution :: ListType -> ListType -> ListType
188 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
189
190 ngramsStatePatchConflictResolution
191 :: TableNgrams.NgramsType
192 -> NodeId
193 -> NgramsTerm
194 -> ConflictResolutionNgramsPatch
195 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
196 = (ours, (const ours, ours), (False, False))
197 -- (False, False) mean here that Mod has always priority.
198 -- (True, False) <- would mean priority to the left (same as ours).
199
200 -- undefined {- TODO think this through -}, listTypeConflictResolution)
201
202 -- Current state:
203 -- Insertions are not considered as patches,
204 -- they do not extend history,
205 -- they do not bump version.
206 insertNewOnly :: a -> Maybe b -> a
207 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
208 -- TODO error handling
209
210 {- unused
211 -- TODO refactor with putListNgrams
212 copyListNgrams :: RepoCmdM env err m
213 => NodeId -> NodeId -> NgramsType
214 -> m ()
215 copyListNgrams srcListId dstListId ngramsType = do
216 var <- view repoVar
217 liftBase $ modifyMVar_ var $
218 pure . (r_state . at ngramsType %~ (Just . f . something))
219 saveRepo
220 where
221 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
222 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
223
224 -- TODO refactor with putListNgrams
225 -- The list must be non-empty!
226 -- The added ngrams must be non-existent!
227 addListNgrams :: RepoCmdM env err m
228 => NodeId -> NgramsType
229 -> [NgramsElement] -> m ()
230 addListNgrams listId ngramsType nes = do
231 var <- view repoVar
232 liftBase $ modifyMVar_ var $
233 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
234 saveRepo
235 where
236 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
237 -}
238
239 -- UNSAFE
240 rmListNgrams :: RepoCmdM env err m
241 => ListId
242 -> TableNgrams.NgramsType
243 -> m ()
244 rmListNgrams l nt = setListNgrams l nt mempty
245
246 -- | TODO: incr the Version number
247 -- && should use patch
248 -- UNSAFE
249 setListNgrams :: RepoCmdM env err m
250 => NodeId
251 -> TableNgrams.NgramsType
252 -> Map NgramsTerm NgramsRepoElement
253 -> m ()
254 setListNgrams listId ngramsType ns = do
255 var <- view repoVar
256 liftBase $ modifyMVar_ var $
257 pure . ( r_state
258 . at ngramsType %~
259 (Just .
260 (at listId .~ ( Just ns))
261 . something
262 )
263 )
264 saveRepo
265
266
267 currentVersion :: RepoCmdM env err m
268 => m Version
269 currentVersion = do
270 var <- view repoVar
271 r <- liftBase $ readMVar var
272 pure $ r ^. r_version
273
274
275 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
276 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
277 commitStatePatch (Versioned p_version p) = do
278 var <- view repoVar
279 vq' <- liftBase $ modifyMVar var $ \r -> do
280 let
281 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
282 (p', q') = transformWith ngramsStatePatchConflictResolution p q
283 r' = r & r_version +~ 1
284 & r_state %~ act p'
285 & r_history %~ (p' :)
286 {-
287 -- Ideally we would like to check these properties. However:
288 -- * They should be checked only to debug the code. The client data
289 -- should be able to trigger these.
290 -- * What kind of error should they throw (we are in IO here)?
291 -- * Should we keep modifyMVar?
292 -- * Should we throw the validation in an Exception, catch it around
293 -- modifyMVar and throw it back as an Error?
294 assertValid $ transformable p q
295 assertValid $ applicable p' (r ^. r_state)
296 -}
297 pure (r', Versioned (r' ^. r_version) q')
298
299 saveRepo
300 pure vq'
301
302 -- This is a special case of tableNgramsPut where the input patch is empty.
303 tableNgramsPull :: RepoCmdM env err m
304 => ListId
305 -> TableNgrams.NgramsType
306 -> Version
307 -> m (Versioned NgramsTablePatch)
308 tableNgramsPull listId ngramsType p_version = do
309 var <- view repoVar
310 r <- liftBase $ readMVar var
311
312 let
313 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
314 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
315
316 pure (Versioned (r ^. r_version) q_table)
317
318 -- Apply the given patch to the DB and returns the patch to be applied on the
319 -- client.
320 -- TODO-ACCESS check
321 tableNgramsPut :: ( HasNodeError err
322 , HasTreeError err
323 , HasInvalidError err
324 , HasConfig env
325 , HasConnectionPool env
326 , HasSettings env
327 , RepoCmdM env err m
328 )
329 => TabType
330 -> ListId
331 -> Versioned NgramsTablePatch
332 -> m (Versioned NgramsTablePatch)
333 tableNgramsPut tabType listId (Versioned p_version p_table)
334 | p_table == mempty = do
335 let ngramsType = ngramsTypeFromTabType tabType
336 tableNgramsPull listId ngramsType p_version
337
338 | otherwise = do
339 let ngramsType = ngramsTypeFromTabType tabType
340 (p0, p0_validity) = PM.singleton listId p_table
341 (p, p_validity) = PM.singleton ngramsType p0
342
343 assertValid p0_validity
344 assertValid p_validity
345
346 ret <- commitStatePatch (Versioned p_version p)
347 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
348
349 node <- getNode listId
350 let nId = node ^. node_id
351 _uId = node ^. node_userId
352 mCId = node ^. node_parentId
353 -- printDebug "[tableNgramsPut] updating graph with nId" nId
354 -- printDebug "[tableNgramsPut] updating graph with uId" uId
355 -- _ <- recomputeGraph uId nId Conditional
356
357 printDebug "[tableNgramsPut] tabType" tabType
358 printDebug "[tableNgramsPut] listId" listId
359
360 _ <- case mCId of
361 Nothing -> do
362 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
363 pure ()
364 Just cId -> do
365 case tabType of
366 Authors -> do
367 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
368 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
369 pure ()
370 Institutes -> do
371 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
372 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
373 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
374 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
375 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
376 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
377 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
378 pure ()
379 Sources -> do
380 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
381 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
382 pure ()
383 Terms -> do
384 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
385 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
386 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
387 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
388 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
389 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
390 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
391 pure ()
392 _ -> do
393 printDebug "[tableNgramsPut] no update for tabType = " tabType
394 pure ()
395 pure ()
396 pure ret
397
398 {-
399 { _ne_list :: ListType
400 If we merge the parents/children we can potentially create cycles!
401 , _ne_parent :: Maybe NgramsTerm
402 , _ne_children :: MSet NgramsTerm
403 }
404 -}
405
406 getNgramsTableMap :: RepoCmdM env err m
407 => NodeId
408 -> TableNgrams.NgramsType
409 -> m (Versioned NgramsTableMap)
410 getNgramsTableMap nodeId ngramsType = do
411 v <- view repoVar
412 repo <- liftBase $ readMVar v
413 pure $ Versioned (repo ^. r_version)
414 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
415
416 dumpJsonTableMap :: RepoCmdM env err m
417 => Text
418 -> NodeId
419 -> TableNgrams.NgramsType
420 -> m ()
421 dumpJsonTableMap fpath nodeId ngramsType = do
422 m <- getNgramsTableMap nodeId ngramsType
423 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
424 pure ()
425
426 type MinSize = Int
427 type MaxSize = Int
428
429 -- | TODO Errors management
430 -- TODO: polymorphic for Annuaire or Corpus or ...
431 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
432 -- TODO: should take only one ListId
433
434 getTime' :: MonadBase IO m => m TimeSpec
435 getTime' = liftBase $ getTime ProcessCPUTime
436
437
438 getTableNgrams :: forall env err m.
439 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
440 => NodeType -> NodeId -> TabType
441 -> ListId -> Limit -> Maybe Offset
442 -> Maybe ListType
443 -> Maybe MinSize -> Maybe MaxSize
444 -> Maybe OrderBy
445 -> (NgramsTerm -> Bool)
446 -> m (Versioned NgramsTable)
447 getTableNgrams _nType nId tabType listId limit_ offset
448 listType minSize maxSize orderBy searchQuery = do
449
450 t0 <- getTime'
451 -- lIds <- selectNodesWithUsername NodeList userMaster
452 let
453 ngramsType = ngramsTypeFromTabType tabType
454 offset' = maybe 0 identity offset
455 listType' = maybe (const True) (==) listType
456 minSize' = maybe (const True) (<=) minSize
457 maxSize' = maybe (const True) (>=) maxSize
458
459 selected_node n = minSize' s
460 && maxSize' s
461 && searchQuery (n ^. ne_ngrams)
462 && listType' (n ^. ne_list)
463 where
464 s = n ^. ne_size
465
466 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
467
468 ---------------------------------------
469 sortOnOrder Nothing = identity
470 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
471 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
472 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
473 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
474
475 ---------------------------------------
476 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
477 selectAndPaginate tableMap = roots <> inners
478 where
479 list = tableMap ^.. each
480 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
481 (ne ^. ne_root)
482 selected_nodes = list & take limit_
483 . drop offset'
484 . filter selected_node
485 . sortOnOrder orderBy
486 roots = rootOf <$> selected_nodes
487 rootsSet = Set.fromList (_ne_ngrams <$> roots)
488 inners = list & filter (selected_inner rootsSet)
489
490 ---------------------------------------
491 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
492 setScores False table = pure table
493 setScores True table = do
494 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
495 t1 <- getTime'
496 occurrences <- getOccByNgramsOnlyFast' nId
497 listId
498 ngramsType
499 ngrams_terms
500 t2 <- getTime'
501 liftBase $ hprint stderr
502 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
503 (length ngrams_terms) t1 t2
504 {-
505 occurrences <- getOccByNgramsOnlySlow nType nId
506 (lIds <> [listId])
507 ngramsType
508 ngrams_terms
509 -}
510 let
511 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
512
513 pure $ table & each %~ setOcc
514 ---------------------------------------
515
516 -- lists <- catMaybes <$> listsWith userMaster
517 -- trace (show lists) $
518 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
519
520 let scoresNeeded = needsScores orderBy
521 tableMap1 <- getNgramsTableMap listId ngramsType
522 t1 <- getTime'
523 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
524 . Map.mapWithKey ngramsElementFromRepo
525 t2 <- getTime'
526 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
527 . setScores (not scoresNeeded)
528 . selectAndPaginate
529 t3 <- getTime'
530 liftBase $ hprint stderr
531 ("getTableNgrams total=" % timeSpecs
532 % " map1=" % timeSpecs
533 % " map2=" % timeSpecs
534 % " map3=" % timeSpecs
535 % " sql=" % (if scoresNeeded then "map2" else "map3")
536 % "\n"
537 ) t0 t3 t0 t1 t1 t2 t2 t3
538 pure tableMap3
539
540
541 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
542 scoresRecomputeTableNgrams nId tabType listId = do
543 tableMap <- getNgramsTableMap listId ngramsType
544 _ <- tableMap & v_data %%~ setScores
545 . Map.mapWithKey ngramsElementFromRepo
546
547 pure $ 1
548 where
549 ngramsType = ngramsTypeFromTabType tabType
550
551 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
552 setScores table = do
553 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
554 occurrences <- getOccByNgramsOnlyFast' nId
555 listId
556 ngramsType
557 ngrams_terms
558 let
559 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
560
561 pure $ table & each %~ setOcc
562
563
564
565 -- APIs
566
567 -- TODO: find a better place for the code above, All APIs stay here
568
569 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
570 deriving (Generic, Enum, Bounded, Read, Show)
571
572 instance FromHttpApiData OrderBy
573 where
574 parseUrlPiece "TermAsc" = pure TermAsc
575 parseUrlPiece "TermDesc" = pure TermDesc
576 parseUrlPiece "ScoreAsc" = pure ScoreAsc
577 parseUrlPiece "ScoreDesc" = pure ScoreDesc
578 parseUrlPiece _ = Left "Unexpected value of OrderBy"
579
580
581 instance ToParamSchema OrderBy
582 instance FromJSON OrderBy
583 instance ToJSON OrderBy
584 instance ToSchema OrderBy
585 instance Arbitrary OrderBy
586 where
587 arbitrary = elements [minBound..maxBound]
588
589 needsScores :: Maybe OrderBy -> Bool
590 needsScores (Just ScoreAsc) = True
591 needsScores (Just ScoreDesc) = True
592 needsScores _ = False
593
594 type TableNgramsApiGet = Summary " Table Ngrams API Get"
595 :> QueryParamR "ngramsType" TabType
596 :> QueryParamR "list" ListId
597 :> QueryParamR "limit" Limit
598 :> QueryParam "offset" Offset
599 :> QueryParam "listType" ListType
600 :> QueryParam "minTermSize" MinSize
601 :> QueryParam "maxTermSize" MaxSize
602 :> QueryParam "orderBy" OrderBy
603 :> QueryParam "search" Text
604 :> Get '[JSON] (Versioned NgramsTable)
605
606 type TableNgramsApiPut = Summary " Table Ngrams API Change"
607 :> QueryParamR "ngramsType" TabType
608 :> QueryParamR "list" ListId
609 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
610 :> Put '[JSON] (Versioned NgramsTablePatch)
611
612 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
613 :> QueryParamR "ngramsType" TabType
614 :> QueryParamR "list" ListId
615 :> "recompute" :> Post '[JSON] Int
616
617 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
618 :> QueryParamR "ngramsType" TabType
619 :> QueryParamR "list" ListId
620 :> Get '[JSON] Version
621
622 type TableNgramsApi = TableNgramsApiGet
623 :<|> TableNgramsApiPut
624 :<|> RecomputeScoresNgramsApiGet
625 :<|> "version" :> TableNgramsApiGetVersion
626
627 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
628 => NodeId
629 -> TabType
630 -> ListId
631 -> Limit
632 -> Maybe Offset
633 -> Maybe ListType
634 -> Maybe MinSize -> Maybe MaxSize
635 -> Maybe OrderBy
636 -> Maybe Text -- full text search
637 -> m (Versioned NgramsTable)
638 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
639 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
640 where
641 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
642
643 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
644 => NodeId
645 -> TabType
646 -> ListId
647 -> m Version
648 getTableNgramsVersion _nId _tabType _listId = currentVersion
649 -- TODO: limit?
650 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
651 -- This line above looks like a waste of computation to finally get only the version.
652 -- See the comment about listNgramsChangedSince.
653
654
655 -- | Text search is deactivated for now for ngrams by doc only
656 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
657 => DocId -> TabType
658 -> ListId -> Limit -> Maybe Offset
659 -> Maybe ListType
660 -> Maybe MinSize -> Maybe MaxSize
661 -> Maybe OrderBy
662 -> Maybe Text -- full text search
663 -> m (Versioned NgramsTable)
664 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
665 ns <- selectNodesWithUsername NodeList userMaster
666 let ngramsType = ngramsTypeFromTabType tabType
667 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
668 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
669 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
670
671
672
673 apiNgramsTableCorpus :: ( RepoCmdM env err m
674 , HasNodeError err
675 , HasTreeError err
676 , HasInvalidError err
677 , HasConnectionPool env
678 , HasConfig env
679 , HasSettings env
680 )
681 => NodeId -> ServerT TableNgramsApi m
682 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
683 :<|> tableNgramsPut
684 :<|> scoresRecomputeTableNgrams cId
685 :<|> getTableNgramsVersion cId
686
687 apiNgramsTableDoc :: ( RepoCmdM env err m
688 , HasNodeError err
689 , HasTreeError err
690 , HasInvalidError err
691 , HasConnectionPool env
692 , HasConfig env
693 , HasSettings env
694 )
695 => DocId -> ServerT TableNgramsApi m
696 apiNgramsTableDoc dId = getTableNgramsDoc dId
697 :<|> tableNgramsPut
698 :<|> scoresRecomputeTableNgrams dId
699 :<|> getTableNgramsVersion dId
700 -- > index all the corpus accordingly (TODO AD)
701
702 -- Did the given list of ngrams changed since the given version?
703 -- The returned value is versioned boolean value, meaning that one always retrieve the
704 -- latest version.
705 -- If the given version is negative then one simply receive the latest version and True.
706 -- Using this function is more precise than simply comparing the latest version number
707 -- with the local version number. Indeed there might be no change to this particular list
708 -- and still the version number has changed because of other lists.
709 --
710 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
711 -- * currentVersion: good computation, good bandwidth, bad precision.
712 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
713 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
714 listNgramsChangedSince :: RepoCmdM env err m
715 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
716 listNgramsChangedSince listId ngramsType version
717 | version < 0 =
718 Versioned <$> currentVersion <*> pure True
719 | otherwise =
720 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)