]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[ngrams] add score update endpoint + sorting
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsStatePatch
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepo
56 , NgramsRepoElement(..)
57 , saveRepo
58 , initRepo
59
60 , RepoEnv(..)
61 , renv_var
62 , renv_lock
63
64 , TabType(..)
65
66 , HasRepoVar(..)
67 , HasRepoSaver(..)
68 , HasRepo(..)
69 , RepoCmdM
70 , QueryParamR
71 , TODO
72
73 -- Internals
74 , getNgramsTableMap
75 , dumpJsonTableMap
76 , tableNgramsPull
77 , tableNgramsPut
78
79 , Version
80 , Versioned(..)
81 , VersionedWithCount(..)
82 , currentVersion
83 , listNgramsChangedSince
84 )
85 where
86
87 import Control.Concurrent
88 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
89 import Control.Monad.Reader
90 import Data.Aeson hiding ((.=))
91 import qualified Data.Aeson.Text as DAT
92 import Data.Either (Either(..))
93 import Data.Foldable
94 import qualified Data.List as List
95 import Data.Map.Strict (Map)
96 import qualified Data.Map.Strict as Map
97 import qualified Data.Map.Strict.Patch as PM
98 import Data.Maybe (fromMaybe)
99 import Data.Monoid
100 import Data.Ord (Down(..))
101 import Data.Patch.Class (Action(act), Transformable(..), ours)
102 import qualified Data.Set as S
103 import qualified Data.Set as Set
104 import Data.Swagger hiding (version, patch)
105 import Data.Text (Text, isInfixOf, unpack)
106 import Data.Text.Lazy.IO as DTL
107 import Formatting (hprint, int, (%))
108 import Formatting.Clock (timeSpecs)
109 import GHC.Generics (Generic)
110 import Servant hiding (Patch)
111 import System.Clock (getTime, TimeSpec, Clock(..))
112 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
113 import System.IO (stderr)
114 import Test.QuickCheck (elements)
115 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
116
117 import Prelude (error)
118 import Gargantext.Prelude hiding (log)
119
120 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
121 import Gargantext.API.Admin.Types (HasSettings)
122 import qualified Gargantext.API.Metrics as Metrics
123 import Gargantext.API.Ngrams.Types
124 import Gargantext.API.Prelude
125 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
126 import Gargantext.Core.Utils (something)
127 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
128 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
129 import Gargantext.Database.Action.Flow.Types
130 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
131 import Gargantext.Database.Admin.Config (userMaster)
132 import Gargantext.Database.Admin.Types.Node (NodeType(..))
133 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
134 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
135 import Gargantext.Database.Query.Table.Node.Select
136 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
137 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
138 import Gargantext.Database.Query.Table.Node (getNode)
139 import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
140 import Gargantext.Prelude.Job
141
142 {-
143 -- TODO sequences of modifications (Patchs)
144 type NgramsIdPatch = Patch NgramsId NgramsPatch
145
146 ngramsPatch :: Int -> NgramsPatch
147 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
148
149 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
150 toEdit n p = Edit n p
151 ngramsIdPatch :: Patch NgramsId NgramsPatch
152 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
153 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
154 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
155 ]
156
157 -- applyPatchBack :: Patch -> IO Patch
158 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
159 -}
160 ------------------------------------------------------------------------
161 ------------------------------------------------------------------------
162 ------------------------------------------------------------------------
163
164 {-
165 -- TODO: Replace.old is ignored which means that if the current list
166 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
167 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
168 -- However this should not happen in non conflicting situations.
169 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
170 mkListsUpdate nt patches =
171 [ (ngramsTypeId nt, ng, listTypeId lt)
172 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
173 , lt <- patch ^.. patch_list . new
174 ]
175
176 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
177 -> NgramsType
178 -> NgramsTablePatch
179 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
180 mkChildrenGroups addOrRem nt patches =
181 [ (ngramsTypeId nt, parent, child)
182 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
183 , child <- patch ^.. patch_children . to addOrRem . folded
184 ]
185 -}
186
187 ------------------------------------------------------------------------
188
189 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
190 => m ()
191 saveRepo = liftBase =<< view repoSaver
192
193 listTypeConflictResolution :: ListType -> ListType -> ListType
194 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
195
196 ngramsStatePatchConflictResolution
197 :: TableNgrams.NgramsType
198 -> NodeId
199 -> NgramsTerm
200 -> ConflictResolutionNgramsPatch
201 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
202 = (ours, (const ours, ours), (False, False))
203 -- (False, False) mean here that Mod has always priority.
204 -- (True, False) <- would mean priority to the left (same as ours).
205
206 -- undefined {- TODO think this through -}, listTypeConflictResolution)
207
208 -- Current state:
209 -- Insertions are not considered as patches,
210 -- they do not extend history,
211 -- they do not bump version.
212 insertNewOnly :: a -> Maybe b -> a
213 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
214 -- TODO error handling
215
216 {- unused
217 -- TODO refactor with putListNgrams
218 copyListNgrams :: RepoCmdM env err m
219 => NodeId -> NodeId -> NgramsType
220 -> m ()
221 copyListNgrams srcListId dstListId ngramsType = do
222 var <- view repoVar
223 liftBase $ modifyMVar_ var $
224 pure . (r_state . at ngramsType %~ (Just . f . something))
225 saveRepo
226 where
227 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
228 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
229
230 -- TODO refactor with putListNgrams
231 -- The list must be non-empty!
232 -- The added ngrams must be non-existent!
233 addListNgrams :: RepoCmdM env err m
234 => NodeId -> NgramsType
235 -> [NgramsElement] -> m ()
236 addListNgrams listId ngramsType nes = do
237 var <- view repoVar
238 liftBase $ modifyMVar_ var $
239 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
240 saveRepo
241 where
242 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
243 -}
244
245 -- UNSAFE
246 rmListNgrams :: RepoCmdM env err m
247 => ListId
248 -> TableNgrams.NgramsType
249 -> m ()
250 rmListNgrams l nt = setListNgrams l nt mempty
251
252 -- | TODO: incr the Version number
253 -- && should use patch
254 -- UNSAFE
255 setListNgrams :: RepoCmdM env err m
256 => NodeId
257 -> TableNgrams.NgramsType
258 -> Map NgramsTerm NgramsRepoElement
259 -> m ()
260 setListNgrams listId ngramsType ns = do
261 var <- view repoVar
262 liftBase $ modifyMVar_ var $
263 pure . ( r_state
264 . at ngramsType %~
265 (Just .
266 (at listId .~ ( Just ns))
267 . something
268 )
269 )
270 saveRepo
271
272
273 currentVersion :: RepoCmdM env err m
274 => m Version
275 currentVersion = do
276 var <- view repoVar
277 r <- liftBase $ readMVar var
278 pure $ r ^. r_version
279
280 newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
281 newNgramsFromNgramsStatePatch p =
282 [ text2ngrams (unNgramsTerm n)
283 | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
284 , _ <- np ^.. patch_new . _Just
285 ]
286
287 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
288 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
289 commitStatePatch (Versioned p_version p) = do
290 var <- view repoVar
291 vq' <- liftBase $ modifyMVar var $ \r -> do
292 let
293 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
294 (p', q') = transformWith ngramsStatePatchConflictResolution p q
295 r' = r & r_version +~ 1
296 & r_state %~ act p'
297 & r_history %~ (p' :)
298 {-
299 -- Ideally we would like to check these properties. However:
300 -- * They should be checked only to debug the code. The client data
301 -- should be able to trigger these.
302 -- * What kind of error should they throw (we are in IO here)?
303 -- * Should we keep modifyMVar?
304 -- * Should we throw the validation in an Exception, catch it around
305 -- modifyMVar and throw it back as an Error?
306 assertValid $ transformable p q
307 assertValid $ applicable p' (r ^. r_state)
308 -}
309 pure (r', Versioned (r' ^. r_version) q')
310
311 saveRepo
312
313 -- Save new ngrams
314 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
315
316 pure vq'
317
318 -- This is a special case of tableNgramsPut where the input patch is empty.
319 tableNgramsPull :: RepoCmdM env err m
320 => ListId
321 -> TableNgrams.NgramsType
322 -> Version
323 -> m (Versioned NgramsTablePatch)
324 tableNgramsPull listId ngramsType p_version = do
325 var <- view repoVar
326 r <- liftBase $ readMVar var
327
328 let
329 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
330 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
331
332 pure (Versioned (r ^. r_version) q_table)
333
334 -- Apply the given patch to the DB and returns the patch to be applied on the
335 -- client.
336 -- TODO-ACCESS check
337 tableNgramsPut :: ( FlowCmdM env err m
338 , HasSettings env
339 )
340 => TabType
341 -> ListId
342 -> Versioned NgramsTablePatch
343 -> m (Versioned NgramsTablePatch)
344 tableNgramsPut tabType listId (Versioned p_version p_table)
345 | p_table == mempty = do
346 let ngramsType = ngramsTypeFromTabType tabType
347 tableNgramsPull listId ngramsType p_version
348
349 | otherwise = do
350 let ngramsType = ngramsTypeFromTabType tabType
351 (p0, p0_validity) = PM.singleton listId p_table
352 (p, p_validity) = PM.singleton ngramsType p0
353
354 assertValid p0_validity
355 assertValid p_validity
356
357 ret <- commitStatePatch (Versioned p_version p)
358 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
359
360 pure ret
361
362
363 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
364 , HasNodeError err
365 , HasSettings env
366 )
367 => UpdateTableNgramsCharts
368 -> (JobLog -> m ())
369 -> m JobLog
370 tableNgramsPostChartsAsync utn logStatus = do
371 let tabType = utn ^. utn_tab_type
372 let listId = utn ^. utn_list_id
373
374 node <- getNode listId
375 let nId = node ^. node_id
376 _uId = node ^. node_userId
377 mCId = node ^. node_parentId
378
379 printDebug "[tableNgramsPut] tabType" tabType
380 printDebug "[tableNgramsPut] listId" listId
381
382 case mCId of
383 Nothing -> do
384 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
385 pure $ jobLogFail $ jobLogInit 1
386 Just cId -> do
387 case tabType of
388 Authors -> do
389 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
390 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
391 logRef
392 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
393 logRefSuccess
394
395 getRef
396 Institutes -> do
397 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
398 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
399 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
400 logRef
401 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
402 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
403 logRefSuccess
404 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
405 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
406 logRefSuccess
407 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
408 logRefSuccess
409
410 getRef
411 Sources -> do
412 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
413 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
414 logRef
415 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
416 logRefSuccess
417
418 getRef
419 Terms -> do
420 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
421 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
422 logRef
423 {-
424 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
425 logRefSuccess
426 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
427 logRefSuccess
428 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
429 logRefSuccess
430 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
431 logRefSuccess
432 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
433 logRefSuccess
434 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
435 -}
436 logRefSuccess
437
438 getRef
439 _ -> do
440 printDebug "[tableNgramsPut] no update for tabType = " tabType
441 pure $ jobLogFail $ jobLogInit 1
442
443 {-
444 { _ne_list :: ListType
445 If we merge the parents/children we can potentially create cycles!
446 , _ne_parent :: Maybe NgramsTerm
447 , _ne_children :: MSet NgramsTerm
448 }
449 -}
450
451 getNgramsTableMap :: RepoCmdM env err m
452 => NodeId
453 -> TableNgrams.NgramsType
454 -> m (Versioned NgramsTableMap)
455 getNgramsTableMap nodeId ngramsType = do
456 v <- view repoVar
457 repo <- liftBase $ readMVar v
458 pure $ Versioned (repo ^. r_version)
459 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
460
461 dumpJsonTableMap :: RepoCmdM env err m
462 => Text
463 -> NodeId
464 -> TableNgrams.NgramsType
465 -> m ()
466 dumpJsonTableMap fpath nodeId ngramsType = do
467 m <- getNgramsTableMap nodeId ngramsType
468 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
469 pure ()
470
471 type MinSize = Int
472 type MaxSize = Int
473
474 -- | TODO Errors management
475 -- TODO: polymorphic for Annuaire or Corpus or ...
476 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
477 -- TODO: should take only one ListId
478
479 getTime' :: MonadBase IO m => m TimeSpec
480 getTime' = liftBase $ getTime ProcessCPUTime
481
482
483 getTableNgrams :: forall env err m.
484 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
485 => NodeType -> NodeId -> TabType
486 -> ListId -> Limit -> Maybe Offset
487 -> Maybe ListType
488 -> Maybe MinSize -> Maybe MaxSize
489 -> Maybe OrderBy
490 -> (NgramsTerm -> Bool)
491 -> m (VersionedWithCount NgramsTable)
492 getTableNgrams _nType nId tabType listId limit_ offset
493 listType minSize maxSize orderBy searchQuery = do
494
495 t0 <- getTime'
496 -- lIds <- selectNodesWithUsername NodeList userMaster
497 let
498 ngramsType = ngramsTypeFromTabType tabType
499 offset' = maybe 0 identity offset
500 listType' = maybe (const True) (==) listType
501 minSize' = maybe (const True) (<=) minSize
502 maxSize' = maybe (const True) (>=) maxSize
503
504 selected_node n = minSize' s
505 && maxSize' s
506 && searchQuery (n ^. ne_ngrams)
507 && listType' (n ^. ne_list)
508 where
509 s = n ^. ne_size
510
511 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
512
513 ---------------------------------------
514 sortOnOrder Nothing = identity
515 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
516 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
517 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
518 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
519
520 ---------------------------------------
521
522 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
523 filteredNodes tableMap = rootOf <$> list & filter selected_node
524 where
525 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
526 (ne ^. ne_root)
527 list = tableMap ^.. each
528
529 ---------------------------------------
530 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
531 selectAndPaginate tableMap = roots <> inners
532 where
533 list = tableMap ^.. each
534 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
535 (ne ^. ne_root)
536 selected_nodes = list & take limit_
537 . drop offset'
538 . filter selected_node
539 . sortOnOrder orderBy
540 roots = rootOf <$> selected_nodes
541 rootsSet = Set.fromList (_ne_ngrams <$> roots)
542 inners = list & filter (selected_inner rootsSet)
543
544 ---------------------------------------
545 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
546 setScores False table = pure table
547 setScores True table = do
548 let ngrams_terms = table ^.. each . ne_ngrams
549 t1 <- getTime'
550 occurrences <- getOccByNgramsOnlyFast' nId
551 listId
552 ngramsType
553 ngrams_terms
554 t2 <- getTime'
555 liftBase $ hprint stderr
556 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
557 (length ngrams_terms) t1 t2
558 {-
559 occurrences <- getOccByNgramsOnlySlow nType nId
560 (lIds <> [listId])
561 ngramsType
562 ngrams_terms
563 -}
564 let
565 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
566
567 pure $ table & each %~ setOcc
568 ---------------------------------------
569
570 -- lists <- catMaybes <$> listsWith userMaster
571 -- trace (show lists) $
572 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
573
574
575 let scoresNeeded = needsScores orderBy
576 tableMap1 <- getNgramsTableMap listId ngramsType
577 t1 <- getTime'
578 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
579 . Map.mapWithKey ngramsElementFromRepo
580
581 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
582 . filteredNodes
583 let fltrCount = length $ fltr ^. v_data . _NgramsTable
584
585 t2 <- getTime'
586 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
587 . setScores (not scoresNeeded)
588 . selectAndPaginate
589 t3 <- getTime'
590 liftBase $ hprint stderr
591 ("getTableNgrams total=" % timeSpecs
592 % " map1=" % timeSpecs
593 % " map2=" % timeSpecs
594 % " map3=" % timeSpecs
595 % " sql=" % (if scoresNeeded then "map2" else "map3")
596 % "\n"
597 ) t0 t3 t0 t1 t1 t2 t2 t3
598 pure $ toVersionedWithCount fltrCount tableMap3
599
600
601 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
602 scoresRecomputeTableNgrams nId tabType listId = do
603 tableMap <- getNgramsTableMap listId ngramsType
604 _ <- tableMap & v_data %%~ setScores
605 . Map.mapWithKey ngramsElementFromRepo
606
607 pure $ 1
608 where
609 ngramsType = ngramsTypeFromTabType tabType
610
611 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
612 setScores table = do
613 let ngrams_terms = table ^.. each . ne_ngrams
614 occurrences <- getOccByNgramsOnlyFast' nId
615 listId
616 ngramsType
617 ngrams_terms
618 let
619 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
620
621 pure $ table & each %~ setOcc
622
623
624
625 -- APIs
626
627 -- TODO: find a better place for the code above, All APIs stay here
628
629 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
630 deriving (Generic, Enum, Bounded, Read, Show)
631
632 instance FromHttpApiData OrderBy
633 where
634 parseUrlPiece "TermAsc" = pure TermAsc
635 parseUrlPiece "TermDesc" = pure TermDesc
636 parseUrlPiece "ScoreAsc" = pure ScoreAsc
637 parseUrlPiece "ScoreDesc" = pure ScoreDesc
638 parseUrlPiece _ = Left "Unexpected value of OrderBy"
639
640
641 instance ToParamSchema OrderBy
642 instance FromJSON OrderBy
643 instance ToJSON OrderBy
644 instance ToSchema OrderBy
645 instance Arbitrary OrderBy
646 where
647 arbitrary = elements [minBound..maxBound]
648
649 needsScores :: Maybe OrderBy -> Bool
650 needsScores (Just ScoreAsc) = True
651 needsScores (Just ScoreDesc) = True
652 needsScores _ = False
653
654 type TableNgramsApiGet = Summary " Table Ngrams API Get"
655 :> QueryParamR "ngramsType" TabType
656 :> QueryParamR "list" ListId
657 :> QueryParamR "limit" Limit
658 :> QueryParam "offset" Offset
659 :> QueryParam "listType" ListType
660 :> QueryParam "minTermSize" MinSize
661 :> QueryParam "maxTermSize" MaxSize
662 :> QueryParam "orderBy" OrderBy
663 :> QueryParam "search" Text
664 :> Get '[JSON] (VersionedWithCount NgramsTable)
665
666 type TableNgramsApiPut = Summary " Table Ngrams API Change"
667 :> QueryParamR "ngramsType" TabType
668 :> QueryParamR "list" ListId
669 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
670 :> Put '[JSON] (Versioned NgramsTablePatch)
671
672 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
673 :> QueryParamR "ngramsType" TabType
674 :> QueryParamR "list" ListId
675 :> "recompute" :> Post '[JSON] Int
676
677 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
678 :> QueryParamR "ngramsType" TabType
679 :> QueryParamR "list" ListId
680 :> Get '[JSON] Version
681
682 type TableNgramsApi = TableNgramsApiGet
683 :<|> TableNgramsApiPut
684 :<|> RecomputeScoresNgramsApiGet
685 :<|> "version" :> TableNgramsApiGetVersion
686 :<|> TableNgramsAsyncApi
687
688 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
689 :> "async"
690 :> "charts"
691 :> "update"
692 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
693
694 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
695 => NodeId
696 -> TabType
697 -> ListId
698 -> Limit
699 -> Maybe Offset
700 -> Maybe ListType
701 -> Maybe MinSize -> Maybe MaxSize
702 -> Maybe OrderBy
703 -> Maybe Text -- full text search
704 -> m (VersionedWithCount NgramsTable)
705 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
706 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
707 where
708 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
709
710 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
711 => NodeId
712 -> TabType
713 -> ListId
714 -> m Version
715 getTableNgramsVersion _nId _tabType _listId = currentVersion
716 -- TODO: limit?
717 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
718 -- This line above looks like a waste of computation to finally get only the version.
719 -- See the comment about listNgramsChangedSince.
720
721
722 -- | Text search is deactivated for now for ngrams by doc only
723 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
724 => DocId -> TabType
725 -> ListId -> Limit -> Maybe Offset
726 -> Maybe ListType
727 -> Maybe MinSize -> Maybe MaxSize
728 -> Maybe OrderBy
729 -> Maybe Text -- full text search
730 -> m (VersionedWithCount NgramsTable)
731 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
732 ns <- selectNodesWithUsername NodeList userMaster
733 let ngramsType = ngramsTypeFromTabType tabType
734 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
735 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
736 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
737
738
739
740 apiNgramsTableCorpus :: ( GargServerC env err m
741 )
742 => NodeId -> ServerT TableNgramsApi m
743 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
744 :<|> tableNgramsPut
745 :<|> scoresRecomputeTableNgrams cId
746 :<|> getTableNgramsVersion cId
747 :<|> apiNgramsAsync cId
748
749 apiNgramsTableDoc :: ( GargServerC env err m
750 )
751 => DocId -> ServerT TableNgramsApi m
752 apiNgramsTableDoc dId = getTableNgramsDoc dId
753 :<|> tableNgramsPut
754 :<|> scoresRecomputeTableNgrams dId
755 :<|> getTableNgramsVersion dId
756 :<|> apiNgramsAsync dId
757 -- > index all the corpus accordingly (TODO AD)
758
759 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
760 apiNgramsAsync _dId =
761 serveJobsAPI $
762 JobFunction $ \i log ->
763 let
764 log' x = do
765 printDebug "tableNgramsPostChartsAsync" x
766 liftBase $ log x
767 in tableNgramsPostChartsAsync i log'
768
769 -- Did the given list of ngrams changed since the given version?
770 -- The returned value is versioned boolean value, meaning that one always retrieve the
771 -- latest version.
772 -- If the given version is negative then one simply receive the latest version and True.
773 -- Using this function is more precise than simply comparing the latest version number
774 -- with the local version number. Indeed there might be no change to this particular list
775 -- and still the version number has changed because of other lists.
776 --
777 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
778 -- * currentVersion: good computation, good bandwidth, bad precision.
779 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
780 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
781 listNgramsChangedSince :: RepoCmdM env err m
782 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
783 listNgramsChangedSince listId ngramsType version
784 | version < 0 =
785 Versioned <$> currentVersion <*> pure True
786 | otherwise =
787 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)