]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[Warnings+FEAT] insert for new created ngrams
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TypeOperators #-}
22 {-# LANGUAGE TypeFamilies #-}
23
24 module Gargantext.API.Ngrams
25 ( TableNgramsApi
26 , TableNgramsApiGet
27 , TableNgramsApiPut
28
29 , getTableNgrams
30 , setListNgrams
31 --, rmListNgrams TODO fix before exporting
32 , apiNgramsTableCorpus
33 , apiNgramsTableDoc
34
35 , NgramsStatePatch
36 , NgramsTablePatch
37 , NgramsTableMap
38
39 , NgramsTerm(..)
40
41 , NgramsElement(..)
42 , mkNgramsElement
43
44 , RootParent(..)
45
46 , MSet
47 , mSetFromList
48 , mSetToList
49
50 , Repo(..)
51 , r_version
52 , r_state
53 , r_history
54 , NgramsRepo
55 , NgramsRepoElement(..)
56 , saveRepo
57 , initRepo
58
59 , RepoEnv(..)
60 , renv_var
61 , renv_lock
62
63 , TabType(..)
64
65 , HasRepoVar(..)
66 , HasRepoSaver(..)
67 , HasRepo(..)
68 , RepoCmdM
69 , QueryParamR
70 , TODO
71
72 -- Internals
73 , getNgramsTableMap
74 , dumpJsonTableMap
75 , tableNgramsPull
76 , tableNgramsPut
77
78 , Version
79 , Versioned(..)
80 , currentVersion
81 , listNgramsChangedSince
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import qualified Data.Aeson.Text as DAT
90 import Data.Either (Either(..))
91 import Data.Foldable
92 import qualified Data.List as List
93 import Data.Map.Strict (Map)
94 import qualified Data.Map.Strict as Map
95 import qualified Data.Map.Strict.Patch as PM
96 import Data.Maybe (fromMaybe)
97 import Data.Monoid
98 import Data.Ord (Down(..))
99 import Data.Patch.Class (Action(act), Transformable(..), ours)
100 import qualified Data.Set as S
101 import qualified Data.Set as Set
102 import Data.Swagger hiding (version, patch)
103 import Data.Text (Text, isInfixOf, unpack)
104 import Data.Text.Lazy.IO as DTL
105 import Formatting (hprint, int, (%))
106 import Formatting.Clock (timeSpecs)
107 import GHC.Generics (Generic)
108 import Servant hiding (Patch)
109 import System.Clock (getTime, TimeSpec, Clock(..))
110 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
111 import System.IO (stderr)
112 import Test.QuickCheck (elements)
113 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
114
115 import Prelude (error)
116 import Gargantext.Prelude hiding (log)
117
118 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
119 import Gargantext.API.Admin.Types (HasSettings)
120 import qualified Gargantext.API.Metrics as Metrics
121 import Gargantext.API.Ngrams.Types
122 import Gargantext.API.Prelude
123 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
124 import Gargantext.Core.Utils (something)
125 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
126 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
127 import Gargantext.Database.Action.Flow.Types
128 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
129 import Gargantext.Database.Admin.Config (userMaster)
130 import Gargantext.Database.Admin.Types.Node (NodeType(..))
131 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
132 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
133 import Gargantext.Database.Query.Table.Node.Select
134 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
135 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
136 import Gargantext.Database.Query.Table.Node (getNode)
137 import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
138 import Gargantext.Prelude.Job
139
140 {-
141 -- TODO sequences of modifications (Patchs)
142 type NgramsIdPatch = Patch NgramsId NgramsPatch
143
144 ngramsPatch :: Int -> NgramsPatch
145 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
146
147 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
148 toEdit n p = Edit n p
149 ngramsIdPatch :: Patch NgramsId NgramsPatch
150 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
151 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
152 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
153 ]
154
155 -- applyPatchBack :: Patch -> IO Patch
156 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
157 -}
158 ------------------------------------------------------------------------
159 ------------------------------------------------------------------------
160 ------------------------------------------------------------------------
161
162 {-
163 -- TODO: Replace.old is ignored which means that if the current list
164 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
165 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
166 -- However this should not happen in non conflicting situations.
167 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
168 mkListsUpdate nt patches =
169 [ (ngramsTypeId nt, ng, listTypeId lt)
170 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
171 , lt <- patch ^.. patch_list . new
172 ]
173
174 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
175 -> NgramsType
176 -> NgramsTablePatch
177 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
178 mkChildrenGroups addOrRem nt patches =
179 [ (ngramsTypeId nt, parent, child)
180 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
181 , child <- patch ^.. patch_children . to addOrRem . folded
182 ]
183 -}
184
185 ------------------------------------------------------------------------
186
187 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
188 => m ()
189 saveRepo = liftBase =<< view repoSaver
190
191 listTypeConflictResolution :: ListType -> ListType -> ListType
192 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
193
194 ngramsStatePatchConflictResolution
195 :: TableNgrams.NgramsType
196 -> NodeId
197 -> NgramsTerm
198 -> ConflictResolutionNgramsPatch
199 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
200 = (ours, (const ours, ours), (False, False))
201 -- (False, False) mean here that Mod has always priority.
202 -- (True, False) <- would mean priority to the left (same as ours).
203
204 -- undefined {- TODO think this through -}, listTypeConflictResolution)
205
206 -- Current state:
207 -- Insertions are not considered as patches,
208 -- they do not extend history,
209 -- they do not bump version.
210 insertNewOnly :: a -> Maybe b -> a
211 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
212 -- TODO error handling
213
214 {- unused
215 -- TODO refactor with putListNgrams
216 copyListNgrams :: RepoCmdM env err m
217 => NodeId -> NodeId -> NgramsType
218 -> m ()
219 copyListNgrams srcListId dstListId ngramsType = do
220 var <- view repoVar
221 liftBase $ modifyMVar_ var $
222 pure . (r_state . at ngramsType %~ (Just . f . something))
223 saveRepo
224 where
225 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
226 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
227
228 -- TODO refactor with putListNgrams
229 -- The list must be non-empty!
230 -- The added ngrams must be non-existent!
231 addListNgrams :: RepoCmdM env err m
232 => NodeId -> NgramsType
233 -> [NgramsElement] -> m ()
234 addListNgrams listId ngramsType nes = do
235 var <- view repoVar
236 liftBase $ modifyMVar_ var $
237 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
238 saveRepo
239 where
240 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
241 -}
242
243 -- UNSAFE
244 rmListNgrams :: RepoCmdM env err m
245 => ListId
246 -> TableNgrams.NgramsType
247 -> m ()
248 rmListNgrams l nt = setListNgrams l nt mempty
249
250 -- | TODO: incr the Version number
251 -- && should use patch
252 -- UNSAFE
253 setListNgrams :: RepoCmdM env err m
254 => NodeId
255 -> TableNgrams.NgramsType
256 -> Map NgramsTerm NgramsRepoElement
257 -> m ()
258 setListNgrams listId ngramsType ns = do
259 var <- view repoVar
260 liftBase $ modifyMVar_ var $
261 pure . ( r_state
262 . at ngramsType %~
263 (Just .
264 (at listId .~ ( Just ns))
265 . something
266 )
267 )
268 saveRepo
269
270
271 currentVersion :: RepoCmdM env err m
272 => m Version
273 currentVersion = do
274 var <- view repoVar
275 r <- liftBase $ readMVar var
276 pure $ r ^. r_version
277
278 newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
279 newNgramsFromNgramsStatePatch p =
280 [ text2ngrams (unNgramsTerm n)
281 | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
282 , _ <- np ^.. patch_new . _Just
283 ]
284
285 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
286 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
287 commitStatePatch (Versioned p_version p) = do
288 var <- view repoVar
289 vq' <- liftBase $ modifyMVar var $ \r -> do
290 let
291 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
292 (p', q') = transformWith ngramsStatePatchConflictResolution p q
293 r' = r & r_version +~ 1
294 & r_state %~ act p'
295 & r_history %~ (p' :)
296 {-
297 -- Ideally we would like to check these properties. However:
298 -- * They should be checked only to debug the code. The client data
299 -- should be able to trigger these.
300 -- * What kind of error should they throw (we are in IO here)?
301 -- * Should we keep modifyMVar?
302 -- * Should we throw the validation in an Exception, catch it around
303 -- modifyMVar and throw it back as an Error?
304 assertValid $ transformable p q
305 assertValid $ applicable p' (r ^. r_state)
306 -}
307 pure (r', Versioned (r' ^. r_version) q')
308
309 saveRepo
310
311 -- Save new ngrams
312 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
313
314 pure vq'
315
316 -- This is a special case of tableNgramsPut where the input patch is empty.
317 tableNgramsPull :: RepoCmdM env err m
318 => ListId
319 -> TableNgrams.NgramsType
320 -> Version
321 -> m (Versioned NgramsTablePatch)
322 tableNgramsPull listId ngramsType p_version = do
323 var <- view repoVar
324 r <- liftBase $ readMVar var
325
326 let
327 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
328 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
329
330 pure (Versioned (r ^. r_version) q_table)
331
332 -- Apply the given patch to the DB and returns the patch to be applied on the
333 -- client.
334 -- TODO-ACCESS check
335 tableNgramsPut :: ( FlowCmdM env err m
336 , HasSettings env
337 )
338 => TabType
339 -> ListId
340 -> Versioned NgramsTablePatch
341 -> m (Versioned NgramsTablePatch)
342 tableNgramsPut tabType listId (Versioned p_version p_table)
343 | p_table == mempty = do
344 let ngramsType = ngramsTypeFromTabType tabType
345 tableNgramsPull listId ngramsType p_version
346
347 | otherwise = do
348 let ngramsType = ngramsTypeFromTabType tabType
349 (p0, p0_validity) = PM.singleton listId p_table
350 (p, p_validity) = PM.singleton ngramsType p0
351
352 assertValid p0_validity
353 assertValid p_validity
354
355 ret <- commitStatePatch (Versioned p_version p)
356 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
357
358 pure ret
359
360
361 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
362 , HasNodeError err
363 , HasSettings env
364 )
365 => UpdateTableNgramsCharts
366 -> (JobLog -> m ())
367 -> m JobLog
368 tableNgramsPostChartsAsync utn logStatus = do
369 let tabType = utn ^. utn_tab_type
370 let listId = utn ^. utn_list_id
371
372 node <- getNode listId
373 let nId = node ^. node_id
374 _uId = node ^. node_userId
375 mCId = node ^. node_parentId
376
377 printDebug "[tableNgramsPut] tabType" tabType
378 printDebug "[tableNgramsPut] listId" listId
379
380 case mCId of
381 Nothing -> do
382 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
383 pure $ jobLogFail $ jobLogInit 1
384 Just cId -> do
385 case tabType of
386 Authors -> do
387 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
388 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
389 logRef
390 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
391 logRefSuccess
392
393 getRef
394 Institutes -> do
395 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
396 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
397 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
398 logRef
399 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
400 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
401 logRefSuccess
402 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
403 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
404 logRefSuccess
405 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
406 logRefSuccess
407
408 getRef
409 Sources -> do
410 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
411 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
412 logRef
413 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
414 logRefSuccess
415
416 getRef
417 Terms -> do
418 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
419 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
420 logRef
421 {-
422 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
423 logRefSuccess
424 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
425 logRefSuccess
426 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
427 logRefSuccess
428 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
429 logRefSuccess
430 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
431 logRefSuccess
432 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
433 -}
434 logRefSuccess
435
436 getRef
437 _ -> do
438 printDebug "[tableNgramsPut] no update for tabType = " tabType
439 pure $ jobLogFail $ jobLogInit 1
440
441 {-
442 { _ne_list :: ListType
443 If we merge the parents/children we can potentially create cycles!
444 , _ne_parent :: Maybe NgramsTerm
445 , _ne_children :: MSet NgramsTerm
446 }
447 -}
448
449 getNgramsTableMap :: RepoCmdM env err m
450 => NodeId
451 -> TableNgrams.NgramsType
452 -> m (Versioned NgramsTableMap)
453 getNgramsTableMap nodeId ngramsType = do
454 v <- view repoVar
455 repo <- liftBase $ readMVar v
456 pure $ Versioned (repo ^. r_version)
457 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
458
459 dumpJsonTableMap :: RepoCmdM env err m
460 => Text
461 -> NodeId
462 -> TableNgrams.NgramsType
463 -> m ()
464 dumpJsonTableMap fpath nodeId ngramsType = do
465 m <- getNgramsTableMap nodeId ngramsType
466 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
467 pure ()
468
469 type MinSize = Int
470 type MaxSize = Int
471
472 -- | TODO Errors management
473 -- TODO: polymorphic for Annuaire or Corpus or ...
474 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
475 -- TODO: should take only one ListId
476
477 getTime' :: MonadBase IO m => m TimeSpec
478 getTime' = liftBase $ getTime ProcessCPUTime
479
480
481 getTableNgrams :: forall env err m.
482 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
483 => NodeType -> NodeId -> TabType
484 -> ListId -> Limit -> Maybe Offset
485 -> Maybe ListType
486 -> Maybe MinSize -> Maybe MaxSize
487 -> Maybe OrderBy
488 -> (NgramsTerm -> Bool)
489 -> m (Versioned NgramsTable)
490 getTableNgrams _nType nId tabType listId limit_ offset
491 listType minSize maxSize orderBy searchQuery = do
492
493 t0 <- getTime'
494 -- lIds <- selectNodesWithUsername NodeList userMaster
495 let
496 ngramsType = ngramsTypeFromTabType tabType
497 offset' = maybe 0 identity offset
498 listType' = maybe (const True) (==) listType
499 minSize' = maybe (const True) (<=) minSize
500 maxSize' = maybe (const True) (>=) maxSize
501
502 selected_node n = minSize' s
503 && maxSize' s
504 && searchQuery (n ^. ne_ngrams)
505 && listType' (n ^. ne_list)
506 where
507 s = n ^. ne_size
508
509 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
510
511 ---------------------------------------
512 sortOnOrder Nothing = identity
513 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
514 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
515 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
516 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
517
518 ---------------------------------------
519 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
520 selectAndPaginate tableMap = roots <> inners
521 where
522 list = tableMap ^.. each
523 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
524 (ne ^. ne_root)
525 selected_nodes = list & take limit_
526 . drop offset'
527 . filter selected_node
528 . sortOnOrder orderBy
529 roots = rootOf <$> selected_nodes
530 rootsSet = Set.fromList (_ne_ngrams <$> roots)
531 inners = list & filter (selected_inner rootsSet)
532
533 ---------------------------------------
534 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
535 setScores False table = pure table
536 setScores True table = do
537 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
538 t1 <- getTime'
539 occurrences <- getOccByNgramsOnlyFast' nId
540 listId
541 ngramsType
542 ngrams_terms
543 t2 <- getTime'
544 liftBase $ hprint stderr
545 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
546 (length ngrams_terms) t1 t2
547 {-
548 occurrences <- getOccByNgramsOnlySlow nType nId
549 (lIds <> [listId])
550 ngramsType
551 ngrams_terms
552 -}
553 let
554 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
555
556 pure $ table & each %~ setOcc
557 ---------------------------------------
558
559 -- lists <- catMaybes <$> listsWith userMaster
560 -- trace (show lists) $
561 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
562
563 let scoresNeeded = needsScores orderBy
564 tableMap1 <- getNgramsTableMap listId ngramsType
565 t1 <- getTime'
566 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
567 . Map.mapWithKey ngramsElementFromRepo
568 t2 <- getTime'
569 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
570 . setScores (not scoresNeeded)
571 . selectAndPaginate
572 t3 <- getTime'
573 liftBase $ hprint stderr
574 ("getTableNgrams total=" % timeSpecs
575 % " map1=" % timeSpecs
576 % " map2=" % timeSpecs
577 % " map3=" % timeSpecs
578 % " sql=" % (if scoresNeeded then "map2" else "map3")
579 % "\n"
580 ) t0 t3 t0 t1 t1 t2 t2 t3
581 pure tableMap3
582
583
584 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
585 scoresRecomputeTableNgrams nId tabType listId = do
586 tableMap <- getNgramsTableMap listId ngramsType
587 _ <- tableMap & v_data %%~ setScores
588 . Map.mapWithKey ngramsElementFromRepo
589
590 pure $ 1
591 where
592 ngramsType = ngramsTypeFromTabType tabType
593
594 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
595 setScores table = do
596 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
597 occurrences <- getOccByNgramsOnlyFast' nId
598 listId
599 ngramsType
600 ngrams_terms
601 let
602 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
603
604 pure $ table & each %~ setOcc
605
606
607
608 -- APIs
609
610 -- TODO: find a better place for the code above, All APIs stay here
611
612 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
613 deriving (Generic, Enum, Bounded, Read, Show)
614
615 instance FromHttpApiData OrderBy
616 where
617 parseUrlPiece "TermAsc" = pure TermAsc
618 parseUrlPiece "TermDesc" = pure TermDesc
619 parseUrlPiece "ScoreAsc" = pure ScoreAsc
620 parseUrlPiece "ScoreDesc" = pure ScoreDesc
621 parseUrlPiece _ = Left "Unexpected value of OrderBy"
622
623
624 instance ToParamSchema OrderBy
625 instance FromJSON OrderBy
626 instance ToJSON OrderBy
627 instance ToSchema OrderBy
628 instance Arbitrary OrderBy
629 where
630 arbitrary = elements [minBound..maxBound]
631
632 needsScores :: Maybe OrderBy -> Bool
633 needsScores (Just ScoreAsc) = True
634 needsScores (Just ScoreDesc) = True
635 needsScores _ = False
636
637 type TableNgramsApiGet = Summary " Table Ngrams API Get"
638 :> QueryParamR "ngramsType" TabType
639 :> QueryParamR "list" ListId
640 :> QueryParamR "limit" Limit
641 :> QueryParam "offset" Offset
642 :> QueryParam "listType" ListType
643 :> QueryParam "minTermSize" MinSize
644 :> QueryParam "maxTermSize" MaxSize
645 :> QueryParam "orderBy" OrderBy
646 :> QueryParam "search" Text
647 :> Get '[JSON] (Versioned NgramsTable)
648
649 type TableNgramsApiPut = Summary " Table Ngrams API Change"
650 :> QueryParamR "ngramsType" TabType
651 :> QueryParamR "list" ListId
652 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
653 :> Put '[JSON] (Versioned NgramsTablePatch)
654
655 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
656 :> QueryParamR "ngramsType" TabType
657 :> QueryParamR "list" ListId
658 :> "recompute" :> Post '[JSON] Int
659
660 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
661 :> QueryParamR "ngramsType" TabType
662 :> QueryParamR "list" ListId
663 :> Get '[JSON] Version
664
665 type TableNgramsApi = TableNgramsApiGet
666 :<|> TableNgramsApiPut
667 :<|> RecomputeScoresNgramsApiGet
668 :<|> "version" :> TableNgramsApiGetVersion
669 :<|> TableNgramsAsyncApi
670
671 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
672 :> "async"
673 :> "charts"
674 :> "update"
675 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
676
677 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
678 => NodeId
679 -> TabType
680 -> ListId
681 -> Limit
682 -> Maybe Offset
683 -> Maybe ListType
684 -> Maybe MinSize -> Maybe MaxSize
685 -> Maybe OrderBy
686 -> Maybe Text -- full text search
687 -> m (Versioned NgramsTable)
688 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
689 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
690 where
691 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
692
693 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
694 => NodeId
695 -> TabType
696 -> ListId
697 -> m Version
698 getTableNgramsVersion _nId _tabType _listId = currentVersion
699 -- TODO: limit?
700 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
701 -- This line above looks like a waste of computation to finally get only the version.
702 -- See the comment about listNgramsChangedSince.
703
704
705 -- | Text search is deactivated for now for ngrams by doc only
706 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
707 => DocId -> TabType
708 -> ListId -> Limit -> Maybe Offset
709 -> Maybe ListType
710 -> Maybe MinSize -> Maybe MaxSize
711 -> Maybe OrderBy
712 -> Maybe Text -- full text search
713 -> m (Versioned NgramsTable)
714 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
715 ns <- selectNodesWithUsername NodeList userMaster
716 let ngramsType = ngramsTypeFromTabType tabType
717 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
718 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
719 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
720
721
722
723 apiNgramsTableCorpus :: ( GargServerC env err m
724 )
725 => NodeId -> ServerT TableNgramsApi m
726 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
727 :<|> tableNgramsPut
728 :<|> scoresRecomputeTableNgrams cId
729 :<|> getTableNgramsVersion cId
730 :<|> apiNgramsAsync cId
731
732 apiNgramsTableDoc :: ( GargServerC env err m
733 )
734 => DocId -> ServerT TableNgramsApi m
735 apiNgramsTableDoc dId = getTableNgramsDoc dId
736 :<|> tableNgramsPut
737 :<|> scoresRecomputeTableNgrams dId
738 :<|> getTableNgramsVersion dId
739 :<|> apiNgramsAsync dId
740 -- > index all the corpus accordingly (TODO AD)
741
742 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
743 apiNgramsAsync _dId =
744 serveJobsAPI $
745 JobFunction $ \i log ->
746 let
747 log' x = do
748 printDebug "tableNgramsPostChartsAsync" x
749 liftBase $ log x
750 in tableNgramsPostChartsAsync i log'
751
752 -- Did the given list of ngrams changed since the given version?
753 -- The returned value is versioned boolean value, meaning that one always retrieve the
754 -- latest version.
755 -- If the given version is negative then one simply receive the latest version and True.
756 -- Using this function is more precise than simply comparing the latest version number
757 -- with the local version number. Indeed there might be no change to this particular list
758 -- and still the version number has changed because of other lists.
759 --
760 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
761 -- * currentVersion: good computation, good bandwidth, bad precision.
762 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
763 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
764 listNgramsChangedSince :: RepoCmdM env err m
765 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
766 listNgramsChangedSince listId ngramsType version
767 | version < 0 =
768 Versioned <$> currentVersion <*> pure True
769 | otherwise =
770 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)