]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Merge branch 'dev-refactor-metrics' of ssh://gitlab.iscpif.fr:20022/gargantext/haskel...
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsStatePatch
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepo
56 , NgramsRepoElement(..)
57 , saveRepo
58 , initRepo
59
60 , RepoEnv(..)
61 , renv_var
62 , renv_lock
63
64 , TabType(..)
65
66 , HasRepoVar(..)
67 , HasRepoSaver(..)
68 , HasRepo(..)
69 , RepoCmdM
70 , QueryParamR
71 , TODO
72
73 -- Internals
74 , getNgramsTableMap
75 , dumpJsonTableMap
76 , tableNgramsPull
77 , tableNgramsPut
78
79 , Version
80 , Versioned(..)
81 , currentVersion
82 , listNgramsChangedSince
83 )
84 where
85
86 import Control.Concurrent
87 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
88 import Control.Monad.Reader
89 import Data.Aeson hiding ((.=))
90 import qualified Data.Aeson.Text as DAT
91 import Data.Either (Either(..))
92 import Data.Foldable
93 import qualified Data.List as List
94 import Data.Map.Strict (Map)
95 import qualified Data.Map.Strict as Map
96 import qualified Data.Map.Strict.Patch as PM
97 import Data.Maybe (fromMaybe)
98 import Data.Monoid
99 import Data.Ord (Down(..))
100 import Data.Patch.Class (Action(act), Transformable(..), ours)
101 import qualified Data.Set as S
102 import qualified Data.Set as Set
103 import Data.Swagger hiding (version, patch)
104 import Data.Text (Text, isInfixOf, unpack)
105 import Data.Text.Lazy.IO as DTL
106 import Formatting (hprint, int, (%))
107 import Formatting.Clock (timeSpecs)
108 import GHC.Generics (Generic)
109 import Servant hiding (Patch)
110 import System.Clock (getTime, TimeSpec, Clock(..))
111 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
112 import System.IO (stderr)
113 import Test.QuickCheck (elements)
114 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
115
116 import Prelude (error)
117 import Gargantext.Prelude hiding (log)
118
119 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
120 import Gargantext.API.Admin.Types (HasSettings)
121 import qualified Gargantext.API.Metrics as Metrics
122 import Gargantext.API.Ngrams.Types
123 import Gargantext.API.Prelude
124 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
125 import Gargantext.Core.Utils (something)
126 -- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
127 -- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
128 import Gargantext.Database.Action.Flow.Types
129 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
130 import Gargantext.Database.Admin.Config (userMaster)
131 import Gargantext.Database.Admin.Types.Node (NodeType(..))
132 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
133 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
134 import Gargantext.Database.Query.Table.Node.Select
135 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
136 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
137 import Gargantext.Database.Query.Table.Node (getNode)
138 import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
139 import Gargantext.Prelude.Job
140
141 {-
142 -- TODO sequences of modifications (Patchs)
143 type NgramsIdPatch = Patch NgramsId NgramsPatch
144
145 ngramsPatch :: Int -> NgramsPatch
146 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
147
148 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
149 toEdit n p = Edit n p
150 ngramsIdPatch :: Patch NgramsId NgramsPatch
151 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
152 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
153 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
154 ]
155
156 -- applyPatchBack :: Patch -> IO Patch
157 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
158 -}
159 ------------------------------------------------------------------------
160 ------------------------------------------------------------------------
161 ------------------------------------------------------------------------
162
163 {-
164 -- TODO: Replace.old is ignored which means that if the current list
165 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
166 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
167 -- However this should not happen in non conflicting situations.
168 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
169 mkListsUpdate nt patches =
170 [ (ngramsTypeId nt, ng, listTypeId lt)
171 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
172 , lt <- patch ^.. patch_list . new
173 ]
174
175 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
176 -> NgramsType
177 -> NgramsTablePatch
178 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
179 mkChildrenGroups addOrRem nt patches =
180 [ (ngramsTypeId nt, parent, child)
181 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
182 , child <- patch ^.. patch_children . to addOrRem . folded
183 ]
184 -}
185
186 ------------------------------------------------------------------------
187
188 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
189 => m ()
190 saveRepo = liftBase =<< view repoSaver
191
192 listTypeConflictResolution :: ListType -> ListType -> ListType
193 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
194
195 ngramsStatePatchConflictResolution
196 :: TableNgrams.NgramsType
197 -> NodeId
198 -> NgramsTerm
199 -> ConflictResolutionNgramsPatch
200 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
201 = (ours, (const ours, ours), (False, False))
202 -- (False, False) mean here that Mod has always priority.
203 -- (True, False) <- would mean priority to the left (same as ours).
204
205 -- undefined {- TODO think this through -}, listTypeConflictResolution)
206
207 -- Current state:
208 -- Insertions are not considered as patches,
209 -- they do not extend history,
210 -- they do not bump version.
211 insertNewOnly :: a -> Maybe b -> a
212 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
213 -- TODO error handling
214
215 {- unused
216 -- TODO refactor with putListNgrams
217 copyListNgrams :: RepoCmdM env err m
218 => NodeId -> NodeId -> NgramsType
219 -> m ()
220 copyListNgrams srcListId dstListId ngramsType = do
221 var <- view repoVar
222 liftBase $ modifyMVar_ var $
223 pure . (r_state . at ngramsType %~ (Just . f . something))
224 saveRepo
225 where
226 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
227 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
228
229 -- TODO refactor with putListNgrams
230 -- The list must be non-empty!
231 -- The added ngrams must be non-existent!
232 addListNgrams :: RepoCmdM env err m
233 => NodeId -> NgramsType
234 -> [NgramsElement] -> m ()
235 addListNgrams listId ngramsType nes = do
236 var <- view repoVar
237 liftBase $ modifyMVar_ var $
238 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
239 saveRepo
240 where
241 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
242 -}
243
244 -- UNSAFE
245 rmListNgrams :: RepoCmdM env err m
246 => ListId
247 -> TableNgrams.NgramsType
248 -> m ()
249 rmListNgrams l nt = setListNgrams l nt mempty
250
251 -- | TODO: incr the Version number
252 -- && should use patch
253 -- UNSAFE
254 setListNgrams :: RepoCmdM env err m
255 => NodeId
256 -> TableNgrams.NgramsType
257 -> Map NgramsTerm NgramsRepoElement
258 -> m ()
259 setListNgrams listId ngramsType ns = do
260 var <- view repoVar
261 liftBase $ modifyMVar_ var $
262 pure . ( r_state
263 . at ngramsType %~
264 (Just .
265 (at listId .~ ( Just ns))
266 . something
267 )
268 )
269 saveRepo
270
271
272 currentVersion :: RepoCmdM env err m
273 => m Version
274 currentVersion = do
275 var <- view repoVar
276 r <- liftBase $ readMVar var
277 pure $ r ^. r_version
278
279 newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
280 newNgramsFromNgramsStatePatch p =
281 [ text2ngrams (unNgramsTerm n)
282 | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
283 , _ <- np ^.. patch_new . _Just
284 ]
285
286 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
287 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
288 commitStatePatch (Versioned p_version p) = do
289 var <- view repoVar
290 vq' <- liftBase $ modifyMVar var $ \r -> do
291 let
292 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
293 (p', q') = transformWith ngramsStatePatchConflictResolution p q
294 r' = r & r_version +~ 1
295 & r_state %~ act p'
296 & r_history %~ (p' :)
297 {-
298 -- Ideally we would like to check these properties. However:
299 -- * They should be checked only to debug the code. The client data
300 -- should be able to trigger these.
301 -- * What kind of error should they throw (we are in IO here)?
302 -- * Should we keep modifyMVar?
303 -- * Should we throw the validation in an Exception, catch it around
304 -- modifyMVar and throw it back as an Error?
305 assertValid $ transformable p q
306 assertValid $ applicable p' (r ^. r_state)
307 -}
308 pure (r', Versioned (r' ^. r_version) q')
309
310 saveRepo
311
312 -- Save new ngrams
313 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
314
315 pure vq'
316
317 -- This is a special case of tableNgramsPut where the input patch is empty.
318 tableNgramsPull :: RepoCmdM env err m
319 => ListId
320 -> TableNgrams.NgramsType
321 -> Version
322 -> m (Versioned NgramsTablePatch)
323 tableNgramsPull listId ngramsType p_version = do
324 var <- view repoVar
325 r <- liftBase $ readMVar var
326
327 let
328 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
329 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
330
331 pure (Versioned (r ^. r_version) q_table)
332
333 -- Apply the given patch to the DB and returns the patch to be applied on the
334 -- client.
335 -- TODO-ACCESS check
336 tableNgramsPut :: ( FlowCmdM env err m
337 , HasSettings env
338 )
339 => TabType
340 -> ListId
341 -> Versioned NgramsTablePatch
342 -> m (Versioned NgramsTablePatch)
343 tableNgramsPut tabType listId (Versioned p_version p_table)
344 | p_table == mempty = do
345 let ngramsType = ngramsTypeFromTabType tabType
346 tableNgramsPull listId ngramsType p_version
347
348 | otherwise = do
349 let ngramsType = ngramsTypeFromTabType tabType
350 (p0, p0_validity) = PM.singleton listId p_table
351 (p, p_validity) = PM.singleton ngramsType p0
352
353 assertValid p0_validity
354 assertValid p_validity
355
356 ret <- commitStatePatch (Versioned p_version p)
357 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
358
359 pure ret
360
361
362 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
363 , HasNodeError err
364 , HasSettings env
365 )
366 => UpdateTableNgramsCharts
367 -> (JobLog -> m ())
368 -> m JobLog
369 tableNgramsPostChartsAsync utn logStatus = do
370 let tabType = utn ^. utn_tab_type
371 let listId = utn ^. utn_list_id
372
373 node <- getNode listId
374 let nId = node ^. node_id
375 _uId = node ^. node_userId
376 mCId = node ^. node_parentId
377
378 printDebug "[tableNgramsPut] tabType" tabType
379 printDebug "[tableNgramsPut] listId" listId
380
381 case mCId of
382 Nothing -> do
383 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
384 pure $ jobLogFail $ jobLogInit 1
385 Just cId -> do
386 case tabType of
387 Authors -> do
388 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
389 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
390 logRef
391 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
392 logRefSuccess
393
394 getRef
395 Institutes -> do
396 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
397 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
398 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
399 logRef
400 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
401 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
402 logRefSuccess
403 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
404 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
405 logRefSuccess
406 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
407 logRefSuccess
408
409 getRef
410 Sources -> do
411 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
412 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
413 logRef
414 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
415 logRefSuccess
416
417 getRef
418 Terms -> do
419 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
420 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
421 logRef
422 {-
423 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
424 logRefSuccess
425 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
426 logRefSuccess
427 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
428 logRefSuccess
429 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
430 logRefSuccess
431 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
432 logRefSuccess
433 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
434 -}
435 logRefSuccess
436
437 getRef
438 _ -> do
439 printDebug "[tableNgramsPut] no update for tabType = " tabType
440 pure $ jobLogFail $ jobLogInit 1
441
442 {-
443 { _ne_list :: ListType
444 If we merge the parents/children we can potentially create cycles!
445 , _ne_parent :: Maybe NgramsTerm
446 , _ne_children :: MSet NgramsTerm
447 }
448 -}
449
450 getNgramsTableMap :: RepoCmdM env err m
451 => NodeId
452 -> TableNgrams.NgramsType
453 -> m (Versioned NgramsTableMap)
454 getNgramsTableMap nodeId ngramsType = do
455 v <- view repoVar
456 repo <- liftBase $ readMVar v
457 pure $ Versioned (repo ^. r_version)
458 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
459
460 dumpJsonTableMap :: RepoCmdM env err m
461 => Text
462 -> NodeId
463 -> TableNgrams.NgramsType
464 -> m ()
465 dumpJsonTableMap fpath nodeId ngramsType = do
466 m <- getNgramsTableMap nodeId ngramsType
467 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
468 pure ()
469
470 type MinSize = Int
471 type MaxSize = Int
472
473 -- | TODO Errors management
474 -- TODO: polymorphic for Annuaire or Corpus or ...
475 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
476 -- TODO: should take only one ListId
477
478 getTime' :: MonadBase IO m => m TimeSpec
479 getTime' = liftBase $ getTime ProcessCPUTime
480
481
482 getTableNgrams :: forall env err m.
483 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
484 => NodeType -> NodeId -> TabType
485 -> ListId -> Limit -> Maybe Offset
486 -> Maybe ListType
487 -> Maybe MinSize -> Maybe MaxSize
488 -> Maybe OrderBy
489 -> (NgramsTerm -> Bool)
490 -> m (Versioned NgramsTable)
491 getTableNgrams _nType nId tabType listId limit_ offset
492 listType minSize maxSize orderBy searchQuery = do
493
494 t0 <- getTime'
495 -- lIds <- selectNodesWithUsername NodeList userMaster
496 let
497 ngramsType = ngramsTypeFromTabType tabType
498 offset' = maybe 0 identity offset
499 listType' = maybe (const True) (==) listType
500 minSize' = maybe (const True) (<=) minSize
501 maxSize' = maybe (const True) (>=) maxSize
502
503 selected_node n = minSize' s
504 && maxSize' s
505 && searchQuery (n ^. ne_ngrams)
506 && listType' (n ^. ne_list)
507 where
508 s = n ^. ne_size
509
510 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
511
512 ---------------------------------------
513 sortOnOrder Nothing = identity
514 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
515 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
516 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
517 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
518
519 ---------------------------------------
520 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
521 selectAndPaginate tableMap = roots <> inners
522 where
523 list = tableMap ^.. each
524 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
525 (ne ^. ne_root)
526 selected_nodes = list & take limit_
527 . drop offset'
528 . filter selected_node
529 . sortOnOrder orderBy
530 roots = rootOf <$> selected_nodes
531 rootsSet = Set.fromList (_ne_ngrams <$> roots)
532 inners = list & filter (selected_inner rootsSet)
533
534 ---------------------------------------
535 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
536 setScores False table = pure table
537 setScores True table = do
538 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
539 t1 <- getTime'
540 occurrences <- getOccByNgramsOnlyFast' nId
541 listId
542 ngramsType
543 ngrams_terms
544 t2 <- getTime'
545 liftBase $ hprint stderr
546 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
547 (length ngrams_terms) t1 t2
548 {-
549 occurrences <- getOccByNgramsOnlySlow nType nId
550 (lIds <> [listId])
551 ngramsType
552 ngrams_terms
553 -}
554 let
555 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
556
557 pure $ table & each %~ setOcc
558 ---------------------------------------
559
560 -- lists <- catMaybes <$> listsWith userMaster
561 -- trace (show lists) $
562 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
563
564 let scoresNeeded = needsScores orderBy
565 tableMap1 <- getNgramsTableMap listId ngramsType
566 t1 <- getTime'
567 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
568 . Map.mapWithKey ngramsElementFromRepo
569 t2 <- getTime'
570 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
571 . setScores (not scoresNeeded)
572 . selectAndPaginate
573 t3 <- getTime'
574 liftBase $ hprint stderr
575 ("getTableNgrams total=" % timeSpecs
576 % " map1=" % timeSpecs
577 % " map2=" % timeSpecs
578 % " map3=" % timeSpecs
579 % " sql=" % (if scoresNeeded then "map2" else "map3")
580 % "\n"
581 ) t0 t3 t0 t1 t1 t2 t2 t3
582 pure tableMap3
583
584
585 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
586 scoresRecomputeTableNgrams nId tabType listId = do
587 tableMap <- getNgramsTableMap listId ngramsType
588 _ <- tableMap & v_data %%~ setScores
589 . Map.mapWithKey ngramsElementFromRepo
590
591 pure $ 1
592 where
593 ngramsType = ngramsTypeFromTabType tabType
594
595 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
596 setScores table = do
597 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
598 occurrences <- getOccByNgramsOnlyFast' nId
599 listId
600 ngramsType
601 ngrams_terms
602 let
603 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
604
605 pure $ table & each %~ setOcc
606
607
608
609 -- APIs
610
611 -- TODO: find a better place for the code above, All APIs stay here
612
613 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
614 deriving (Generic, Enum, Bounded, Read, Show)
615
616 instance FromHttpApiData OrderBy
617 where
618 parseUrlPiece "TermAsc" = pure TermAsc
619 parseUrlPiece "TermDesc" = pure TermDesc
620 parseUrlPiece "ScoreAsc" = pure ScoreAsc
621 parseUrlPiece "ScoreDesc" = pure ScoreDesc
622 parseUrlPiece _ = Left "Unexpected value of OrderBy"
623
624
625 instance ToParamSchema OrderBy
626 instance FromJSON OrderBy
627 instance ToJSON OrderBy
628 instance ToSchema OrderBy
629 instance Arbitrary OrderBy
630 where
631 arbitrary = elements [minBound..maxBound]
632
633 needsScores :: Maybe OrderBy -> Bool
634 needsScores (Just ScoreAsc) = True
635 needsScores (Just ScoreDesc) = True
636 needsScores _ = False
637
638 type TableNgramsApiGet = Summary " Table Ngrams API Get"
639 :> QueryParamR "ngramsType" TabType
640 :> QueryParamR "list" ListId
641 :> QueryParamR "limit" Limit
642 :> QueryParam "offset" Offset
643 :> QueryParam "listType" ListType
644 :> QueryParam "minTermSize" MinSize
645 :> QueryParam "maxTermSize" MaxSize
646 :> QueryParam "orderBy" OrderBy
647 :> QueryParam "search" Text
648 :> Get '[JSON] (Versioned NgramsTable)
649
650 type TableNgramsApiPut = Summary " Table Ngrams API Change"
651 :> QueryParamR "ngramsType" TabType
652 :> QueryParamR "list" ListId
653 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
654 :> Put '[JSON] (Versioned NgramsTablePatch)
655
656 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
657 :> QueryParamR "ngramsType" TabType
658 :> QueryParamR "list" ListId
659 :> "recompute" :> Post '[JSON] Int
660
661 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
662 :> QueryParamR "ngramsType" TabType
663 :> QueryParamR "list" ListId
664 :> Get '[JSON] Version
665
666 type TableNgramsApi = TableNgramsApiGet
667 :<|> TableNgramsApiPut
668 :<|> RecomputeScoresNgramsApiGet
669 :<|> "version" :> TableNgramsApiGetVersion
670 :<|> TableNgramsAsyncApi
671
672 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
673 :> "async"
674 :> "charts"
675 :> "update"
676 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
677
678 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
679 => NodeId
680 -> TabType
681 -> ListId
682 -> Limit
683 -> Maybe Offset
684 -> Maybe ListType
685 -> Maybe MinSize -> Maybe MaxSize
686 -> Maybe OrderBy
687 -> Maybe Text -- full text search
688 -> m (Versioned NgramsTable)
689 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
690 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
691 where
692 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
693
694 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
695 => NodeId
696 -> TabType
697 -> ListId
698 -> m Version
699 getTableNgramsVersion _nId _tabType _listId = currentVersion
700 -- TODO: limit?
701 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
702 -- This line above looks like a waste of computation to finally get only the version.
703 -- See the comment about listNgramsChangedSince.
704
705
706 -- | Text search is deactivated for now for ngrams by doc only
707 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
708 => DocId -> TabType
709 -> ListId -> Limit -> Maybe Offset
710 -> Maybe ListType
711 -> Maybe MinSize -> Maybe MaxSize
712 -> Maybe OrderBy
713 -> Maybe Text -- full text search
714 -> m (Versioned NgramsTable)
715 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
716 ns <- selectNodesWithUsername NodeList userMaster
717 let ngramsType = ngramsTypeFromTabType tabType
718 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
719 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
720 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
721
722
723
724 apiNgramsTableCorpus :: ( GargServerC env err m
725 )
726 => NodeId -> ServerT TableNgramsApi m
727 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
728 :<|> tableNgramsPut
729 :<|> scoresRecomputeTableNgrams cId
730 :<|> getTableNgramsVersion cId
731 :<|> apiNgramsAsync cId
732
733 apiNgramsTableDoc :: ( GargServerC env err m
734 )
735 => DocId -> ServerT TableNgramsApi m
736 apiNgramsTableDoc dId = getTableNgramsDoc dId
737 :<|> tableNgramsPut
738 :<|> scoresRecomputeTableNgrams dId
739 :<|> getTableNgramsVersion dId
740 :<|> apiNgramsAsync dId
741 -- > index all the corpus accordingly (TODO AD)
742
743 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
744 apiNgramsAsync _dId =
745 serveJobsAPI $
746 JobFunction $ \i log ->
747 let
748 log' x = do
749 printDebug "tableNgramsPostChartsAsync" x
750 liftBase $ log x
751 in tableNgramsPostChartsAsync i log'
752
753 -- Did the given list of ngrams changed since the given version?
754 -- The returned value is versioned boolean value, meaning that one always retrieve the
755 -- latest version.
756 -- If the given version is negative then one simply receive the latest version and True.
757 -- Using this function is more precise than simply comparing the latest version number
758 -- with the local version number. Indeed there might be no change to this particular list
759 -- and still the version number has changed because of other lists.
760 --
761 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
762 -- * currentVersion: good computation, good bandwidth, bad precision.
763 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
764 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
765 listNgramsChangedSince :: RepoCmdM env err m
766 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
767 listNgramsChangedSince listId ngramsType version
768 | version < 0 =
769 Versioned <$> currentVersion <*> pure True
770 | otherwise =
771 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)