]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[FIX] removing warning msg
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , getNgramsTable'
73 , setNgramsTableScores
74
75 , Version
76 , Versioned(..)
77 , VersionedWithCount(..)
78 , currentVersion
79 , listNgramsChangedSince
80 , MinSize, MaxSize, OrderBy, NgramsTable
81 , UpdateTableNgramsCharts
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import Data.Either (Either(..))
90 import Data.Foldable
91 import Data.Map.Strict (Map)
92 import Data.Maybe (fromMaybe)
93 import Data.Monoid
94 import Data.Ord (Down(..))
95 import Data.Patch.Class (Action(act), Transformable(..), ours)
96 import Data.Swagger hiding (version, patch)
97 import Data.Text (Text, isInfixOf, unpack, pack)
98 import Data.Text.Lazy.IO as DTL
99 import Formatting (hprint, int, (%))
100 import GHC.Generics (Generic)
101 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
102 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
103 import Gargantext.API.Admin.Types (HasSettings)
104 import Gargantext.API.Job
105 import Gargantext.API.Ngrams.Types
106 import Gargantext.API.Prelude
107 import Gargantext.Core.NodeStory
108 import Gargantext.Core.Mail.Types (HasMail)
109 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
110 import Gargantext.API.Ngrams.Tools
111 import Gargantext.Database.Action.Flow.Types
112 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
113 import Gargantext.Database.Admin.Config (userMaster)
114 import Gargantext.Database.Admin.Types.Node (NodeType(..))
115 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
116 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
117 import Gargantext.Database.Query.Table.Node (getNode)
118 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
119 import Gargantext.Database.Query.Table.Node.Select
120 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
121 import Gargantext.Prelude hiding (log)
122 import Gargantext.Prelude.Clock (hasTime, getTime)
123 import Prelude (error)
124 import Servant hiding (Patch)
125 import Gargantext.Utils.Jobs (serveJobsAPI)
126 import System.IO (stderr)
127 import Test.QuickCheck (elements)
128 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
129 import qualified Data.Aeson.Text as DAT
130 import qualified Data.List as List
131 import qualified Data.Map.Strict as Map
132 import qualified Data.Map.Strict.Patch as PM
133 import qualified Data.Set as S
134 import qualified Data.Set as Set
135 import qualified Gargantext.API.Metrics as Metrics
136 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
137
138 {-
139 -- TODO sequences of modifications (Patchs)
140 type NgramsIdPatch = Patch NgramsId NgramsPatch
141
142 ngramsPatch :: Int -> NgramsPatch
143 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
144
145 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
146 toEdit n p = Edit n p
147 ngramsIdPatch :: Patch NgramsId NgramsPatch
148 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
149 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
150 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
151 ]
152
153 -- applyPatchBack :: Patch -> IO Patch
154 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
155 -}
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158 ------------------------------------------------------------------------
159
160 {-
161 -- TODO: Replace.old is ignored which means that if the current list
162 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
163 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
164 -- However this should not happen in non conflicting situations.
165 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
166 mkListsUpdate nt patches =
167 [ (ngramsTypeId nt, ng, listTypeId lt)
168 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
169 , lt <- patch ^.. patch_list . new
170 ]
171
172 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
173 -> NgramsType
174 -> NgramsTablePatch
175 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
176 mkChildrenGroups addOrRem nt patches =
177 [ (ngramsTypeId nt, parent, child)
178 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
179 , child <- patch ^.. patch_children . to addOrRem . folded
180 ]
181 -}
182
183 ------------------------------------------------------------------------
184
185 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
186 => m ()
187 saveNodeStory = do
188 saver <- view hasNodeStorySaver
189 liftBase $ do
190 --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
191 saver
192 --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
193
194
195 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
196 => m ()
197 saveNodeStoryImmediate = do
198 saver <- view hasNodeStoryImmediateSaver
199 liftBase $ do
200 --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
201 saver
202 --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
203
204 listTypeConflictResolution :: ListType -> ListType -> ListType
205 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
206
207
208 ngramsStatePatchConflictResolution
209 :: TableNgrams.NgramsType
210 -> NgramsTerm
211 -> ConflictResolutionNgramsPatch
212 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
213 = (ours, (const ours, ours), (False, False))
214 -- (False, False) mean here that Mod has always priority.
215 -- = (ours, (const ours, ours), (True, False))
216 -- (True, False) <- would mean priority to the left (same as ours).
217 -- undefined {- TODO think this through -}, listTypeConflictResolution)
218
219
220
221
222 -- Current state:
223 -- Insertions are not considered as patches,
224 -- they do not extend history,
225 -- they do not bump version.
226 insertNewOnly :: a -> Maybe b -> a
227 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
228 -- TODO error handling
229
230 {- unused
231 -- TODO refactor with putListNgrams
232 copyListNgrams :: RepoCmdM env err m
233 => NodeId -> NodeId -> NgramsType
234 -> m ()
235 copyListNgrams srcListId dstListId ngramsType = do
236 var <- view repoVar
237 liftBase $ modifyMVar_ var $
238 pure . (r_state . at ngramsType %~ (Just . f . something))
239 saveNodeStory
240 where
241 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
242 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
243
244 -- TODO refactor with putListNgrams
245 -- The list must be non-empty!
246 -- The added ngrams must be non-existent!
247 addListNgrams :: RepoCmdM env err m
248 => NodeId -> NgramsType
249 -> [NgramsElement] -> m ()
250 addListNgrams listId ngramsType nes = do
251 var <- view repoVar
252 liftBase $ modifyMVar_ var $
253 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
254 saveNodeStory
255 where
256 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
257 -}
258
259 -- | TODO: incr the Version number
260 -- && should use patch
261 -- UNSAFE
262
263 setListNgrams :: HasNodeStory env err m
264 => NodeId
265 -> TableNgrams.NgramsType
266 -> Map NgramsTerm NgramsRepoElement
267 -> m ()
268 setListNgrams listId ngramsType ns = do
269 -- printDebug "[setListNgrams]" (listId, ngramsType)
270 getter <- view hasNodeStory
271 var <- liftBase $ (getter ^. nse_getter) [listId]
272 liftBase $ modifyMVar_ var $
273 pure . ( unNodeStory
274 . at listId . _Just
275 . a_state
276 . at ngramsType
277 .~ Just ns
278 )
279 saveNodeStory
280
281
282 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
283 newNgramsFromNgramsStatePatch p =
284 [ text2ngrams (unNgramsTerm n)
285 | (n,np) <- p ^.. _PatchMap
286 -- . each . _PatchMap
287 . each . _NgramsTablePatch
288 . _PatchMap . ifolded . withIndex
289 , _ <- np ^.. patch_new . _Just
290 ]
291
292
293
294
295 commitStatePatch :: ( HasNodeStory env err m
296 , HasNodeStoryImmediateSaver env
297 , HasNodeArchiveStoryImmediateSaver env
298 , HasMail env)
299 => ListId
300 -> Versioned NgramsStatePatch'
301 -> m (Versioned NgramsStatePatch')
302 commitStatePatch listId (Versioned _p_version p) = do
303 -- printDebug "[commitStatePatch]" listId
304 var <- getNodeStoryVar [listId]
305 archiveSaver <- view hasNodeArchiveStoryImmediateSaver
306 vq' <- liftBase $ modifyMVar var $ \ns -> do
307 let
308 a = ns ^. unNodeStory . at listId . _Just
309 -- apply patches from version p_version to a ^. a_version
310 -- TODO Check this
311 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
312 q = mconcat $ a ^. a_history
313
314 printDebug "transformWith" (p,q)
315
316 let
317 (p', q') = transformWith ngramsStatePatchConflictResolution p q
318 a' = a & a_version +~ 1
319 & a_state %~ act p'
320 & a_history %~ (p' :)
321
322 {-
323 -- Ideally we would like to check these properties. However:
324 -- * They should be checked only to debug the code. The client data
325 -- should be able to trigger these.
326 -- * What kind of error should they throw (we are in IO here)?
327 -- * Should we keep modifyMVar?
328 -- * Should we throw the validation in an Exception, catch it around
329 -- modifyMVar and throw it back as an Error?
330 assertValid $ transformable p q
331 assertValid $ applicable p' (r ^. r_state)
332 -}
333 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
334 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
335 let newNs = ( ns & unNodeStory . at listId .~ (Just a')
336 , Versioned (a' ^. a_version) q'
337 )
338
339 -- NOTE Now is the only good time to save the archive history. We
340 -- have the handle to the MVar and we need to save its exact
341 -- snapshot. Node Story archive is a linear table, so it's only
342 -- couple of inserts, it shouldn't take long...
343
344 -- If we postponed saving the archive to the debounce action, we
345 -- would have issues like
346 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
347 -- where the `q` computation from above (which uses the archive)
348 -- would cause incorrect patch application (before the previous
349 -- archive was saved and applied)
350 newNs' <- archiveSaver $ fst newNs
351
352 pure (newNs', snd newNs)
353
354 -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
355 saveNodeStory
356 --saveNodeStoryImmediate
357 -- Save new ngrams
358 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
359
360 pure vq'
361
362
363
364 -- This is a special case of tableNgramsPut where the input patch is empty.
365 tableNgramsPull :: HasNodeStory env err m
366 => ListId
367 -> TableNgrams.NgramsType
368 -> Version
369 -> m (Versioned NgramsTablePatch)
370 tableNgramsPull listId ngramsType p_version = do
371 printDebug "[tableNgramsPull]" (listId, ngramsType)
372 var <- getNodeStoryVar [listId]
373 r <- liftBase $ readMVar var
374
375 let
376 a = r ^. unNodeStory . at listId . _Just
377 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
378 q_table = q ^. _PatchMap . at ngramsType . _Just
379
380 pure (Versioned (a ^. a_version) q_table)
381
382
383
384
385 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
386 -- Apply the given patch to the DB and returns the patch to be applied on the
387 -- client.
388 -- TODO-ACCESS check
389 tableNgramsPut :: ( HasNodeStory env err m
390 , HasNodeStoryImmediateSaver env
391 , HasNodeArchiveStoryImmediateSaver env
392 , HasInvalidError err
393 , HasSettings env
394 , HasMail env
395 )
396 => TabType
397 -> ListId
398 -> Versioned NgramsTablePatch
399 -> m (Versioned NgramsTablePatch)
400 tableNgramsPut tabType listId (Versioned p_version p_table)
401 | p_table == mempty = do
402 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
403 let ngramsType = ngramsTypeFromTabType tabType
404 tableNgramsPull listId ngramsType p_version
405
406 | otherwise = do
407 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
408 let ngramsType = ngramsTypeFromTabType tabType
409 (p, p_validity) = PM.singleton ngramsType p_table
410
411 assertValid p_validity
412
413 ret <- commitStatePatch listId (Versioned p_version p)
414 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
415
416 pure ret
417
418
419
420 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
421 , FlowCmdM env err m
422 , HasNodeError err
423 , HasSettings env
424 )
425 => UpdateTableNgramsCharts
426 -> (JobLog -> m ())
427 -> m JobLog
428 tableNgramsPostChartsAsync utn logStatus = do
429 let tabType = utn ^. utn_tab_type
430 let listId = utn ^. utn_list_id
431
432 node <- getNode listId
433 let nId = node ^. node_id
434 _uId = node ^. node_user_id
435 mCId = node ^. node_parent_id
436
437 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
438 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
439
440 case mCId of
441 Nothing -> do
442 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
443 pure $ jobLogFail $ jobLogInit 1
444 Just cId -> do
445 case tabType of
446 Authors -> do
447 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
448 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
449 logRef
450 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
451 logRefSuccess
452
453 getRef
454 Institutes -> do
455 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
456 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
457 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
458 logRef
459 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
460 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
461 logRefSuccess
462 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
463 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
464 logRefSuccess
465 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
466 logRefSuccess
467
468 getRef
469 Sources -> do
470 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
471 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
472 logRef
473 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
474 logRefSuccess
475
476 getRef
477 Terms -> do
478 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
479 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
480 logRef
481 {-
482 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
483 logRefSuccess
484 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
485 logRefSuccess
486 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
487 logRefSuccess
488 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
489 logRefSuccess
490 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
491 logRefSuccess
492 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
493 -}
494 logRefSuccess
495
496 getRef
497 _ -> do
498 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
499 pure $ jobLogFail $ jobLogInit 1
500
501 {-
502 { _ne_list :: ListType
503 If we merge the parents/children we can potentially create cycles!
504 , _ne_parent :: Maybe NgramsTerm
505 , _ne_children :: MSet NgramsTerm
506 }
507 -}
508
509 getNgramsTableMap :: HasNodeStory env err m
510 => NodeId
511 -> TableNgrams.NgramsType
512 -> m (Versioned NgramsTableMap)
513 getNgramsTableMap nodeId ngramsType = do
514 v <- getNodeStoryVar [nodeId]
515 repo <- liftBase $ readMVar v
516 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
517 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
518
519
520 dumpJsonTableMap :: HasNodeStory env err m
521 => Text
522 -> NodeId
523 -> TableNgrams.NgramsType
524 -> m ()
525 dumpJsonTableMap fpath nodeId ngramsType = do
526 m <- getNgramsTableMap nodeId ngramsType
527 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
528 pure ()
529
530
531 type MinSize = Int
532 type MaxSize = Int
533
534 -- | TODO Errors management
535 -- TODO: polymorphic for Annuaire or Corpus or ...
536 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
537 -- TODO: should take only one ListId
538
539
540 getTableNgrams :: forall env err m.
541 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
542 => NodeType -> NodeId -> TabType
543 -> ListId -> Limit -> Maybe Offset
544 -> Maybe ListType
545 -> Maybe MinSize -> Maybe MaxSize
546 -> Maybe OrderBy
547 -> (NgramsTerm -> Bool)
548 -> m (VersionedWithCount NgramsTable)
549 getTableNgrams _nType nId tabType listId limit_ offset
550 listType minSize maxSize orderBy searchQuery = do
551
552 t0 <- getTime
553 -- lIds <- selectNodesWithUsername NodeList userMaster
554 let
555 ngramsType = ngramsTypeFromTabType tabType
556 offset' = maybe 0 identity offset
557 listType' = maybe (const True) (==) listType
558 minSize' = maybe (const True) (<=) minSize
559 maxSize' = maybe (const True) (>=) maxSize
560
561 rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
562 (tableMap ^. at r)
563 )
564 (ne ^. ne_root)
565
566 selected_node n = minSize' s
567 && maxSize' s
568 && searchQuery (n ^. ne_ngrams)
569 && listType' (n ^. ne_list)
570 where
571 s = n ^. ne_size
572
573 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
574
575 ---------------------------------------
576 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
577 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
578 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
579 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to length)
580 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to length)
581
582 ---------------------------------------
583 -- | Filter the given `tableMap` with the search criteria.
584 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
585 filteredNodes tableMap = roots
586 where
587 list = tableMap ^.. each
588 selected_nodes = list & filter selected_node
589 roots = rootOf tableMap <$> selected_nodes
590
591 -- | Appends subitems (selected from `tableMap`) for given `roots`.
592 withInners :: Map NgramsTerm NgramsElement -> [NgramsElement] -> [NgramsElement]
593 withInners tableMap roots = roots <> inners
594 where
595 list = tableMap ^.. each
596 rootSet = Set.fromList (_ne_ngrams <$> roots)
597 inners = list & filter (selected_inner rootSet)
598
599 -- | Paginate the results
600 sortAndPaginate :: [NgramsElement] -> [NgramsElement]
601 sortAndPaginate = take limit_
602 . drop offset'
603 . sortOnOrder orderBy
604
605 ---------------------------------------
606
607 let scoresNeeded = needsScores orderBy
608 t1 <- getTime
609
610 tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
611
612 let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
613
614 let fltrCount = length $ fltr ^. v_data . _NgramsTable
615
616 t2 <- getTime
617 let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
618 t3 <- getTime
619 --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
620 liftBase $ do
621 hprint stderr
622 ("getTableNgrams total=" % hasTime
623 % " map1=" % hasTime
624 % " map2=" % hasTime
625 % " map3=" % hasTime
626 % " sql=" % (if scoresNeeded then "map2" else "map3")
627 % "\n"
628 ) t0 t3 t0 t1 t1 t2 t2 t3
629
630 -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
631 pure $ toVersionedWithCount fltrCount tableMapSorted
632
633
634 -- | Helper function to get the ngrams table with scores.
635 getNgramsTable' :: forall env err m.
636 ( HasNodeStory env err m
637 , HasNodeError err
638 , HasConnectionPool env
639 , HasConfig env
640 , HasMail env)
641 => NodeId
642 -> ListId
643 -> TableNgrams.NgramsType
644 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
645 getNgramsTable' nId listId ngramsType = do
646 tableMap <- getNgramsTableMap listId ngramsType
647 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
648 . Map.mapWithKey ngramsElementFromRepo
649
650 -- | Helper function to set scores on an `NgramsTable`.
651 setNgramsTableScores :: forall env err m t.
652 ( Each t t NgramsElement NgramsElement
653 , HasNodeStory env err m
654 , HasNodeError err
655 , HasConnectionPool env
656 , HasConfig env
657 , HasMail env)
658 => NodeId
659 -> ListId
660 -> TableNgrams.NgramsType
661 -> t
662 -> m t
663 setNgramsTableScores nId listId ngramsType table = do
664 t1 <- getTime
665 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
666 --printDebug "[setNgramsTableScores] occurrences" occurrences
667 t2 <- getTime
668 liftBase $ do
669 let ngrams_terms = table ^.. each . ne_ngrams
670 -- printDebug "ngrams_terms" ngrams_terms
671 hprint stderr
672 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
673 (length ngrams_terms) t1 t2
674 let
675 setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
676
677 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
678
679 pure $ table & each %~ setOcc
680
681
682
683
684 scoresRecomputeTableNgrams :: forall env err m.
685 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
686 => NodeId -> TabType -> ListId -> m Int
687 scoresRecomputeTableNgrams nId tabType listId = do
688 tableMap <- getNgramsTableMap listId ngramsType
689 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
690 . Map.mapWithKey ngramsElementFromRepo
691
692 pure $ 1
693 where
694 ngramsType = ngramsTypeFromTabType tabType
695
696
697 -- APIs
698
699 -- TODO: find a better place for the code above, All APIs stay here
700
701 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
702 deriving (Generic, Enum, Bounded, Read, Show)
703
704 instance FromHttpApiData OrderBy
705 where
706 parseUrlPiece "TermAsc" = pure TermAsc
707 parseUrlPiece "TermDesc" = pure TermDesc
708 parseUrlPiece "ScoreAsc" = pure ScoreAsc
709 parseUrlPiece "ScoreDesc" = pure ScoreDesc
710 parseUrlPiece _ = Left "Unexpected value of OrderBy"
711
712 instance ToHttpApiData OrderBy where
713 toUrlPiece = pack . show
714
715 instance ToParamSchema OrderBy
716 instance FromJSON OrderBy
717 instance ToJSON OrderBy
718 instance ToSchema OrderBy
719 instance Arbitrary OrderBy
720 where
721 arbitrary = elements [minBound..maxBound]
722
723 needsScores :: Maybe OrderBy -> Bool
724 needsScores (Just ScoreAsc) = True
725 needsScores (Just ScoreDesc) = True
726 needsScores _ = False
727
728 type TableNgramsApiGet = Summary " Table Ngrams API Get"
729 :> QueryParamR "ngramsType" TabType
730 :> QueryParamR "list" ListId
731 :> QueryParamR "limit" Limit
732 :> QueryParam "offset" Offset
733 :> QueryParam "listType" ListType
734 :> QueryParam "minTermSize" MinSize
735 :> QueryParam "maxTermSize" MaxSize
736 :> QueryParam "orderBy" OrderBy
737 :> QueryParam "search" Text
738 :> Get '[JSON] (VersionedWithCount NgramsTable)
739
740 type TableNgramsApiPut = Summary " Table Ngrams API Change"
741 :> QueryParamR "ngramsType" TabType
742 :> QueryParamR "list" ListId
743 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
744 :> Put '[JSON] (Versioned NgramsTablePatch)
745
746 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
747 :> QueryParamR "ngramsType" TabType
748 :> QueryParamR "list" ListId
749 :> "recompute" :> Post '[JSON] Int
750
751 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
752 :> QueryParamR "ngramsType" TabType
753 :> QueryParamR "list" ListId
754 :> Get '[JSON] Version
755
756 type TableNgramsApi = TableNgramsApiGet
757 :<|> TableNgramsApiPut
758 :<|> RecomputeScoresNgramsApiGet
759 :<|> "version" :> TableNgramsApiGetVersion
760 :<|> TableNgramsAsyncApi
761
762 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
763 :> "async"
764 :> "charts"
765 :> "update"
766 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
767
768 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
769 => NodeId
770 -> TabType
771 -> ListId
772 -> Limit
773 -> Maybe Offset
774 -> Maybe ListType
775 -> Maybe MinSize -> Maybe MaxSize
776 -> Maybe OrderBy
777 -> Maybe Text -- full text search
778 -> m (VersionedWithCount NgramsTable)
779 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
780 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
781 where
782 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
783
784
785
786 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
787 => NodeId
788 -> TabType
789 -> ListId
790 -> m Version
791 getTableNgramsVersion _nId _tabType listId = currentVersion listId
792
793
794
795 -- TODO: limit?
796 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
797 -- This line above looks like a waste of computation to finally get only the version.
798 -- See the comment about listNgramsChangedSince.
799
800
801 -- | Text search is deactivated for now for ngrams by doc only
802 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
803 => DocId -> TabType
804 -> ListId -> Limit -> Maybe Offset
805 -> Maybe ListType
806 -> Maybe MinSize -> Maybe MaxSize
807 -> Maybe OrderBy
808 -> Maybe Text -- full text search
809 -> m (VersionedWithCount NgramsTable)
810 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
811 ns <- selectNodesWithUsername NodeList userMaster
812 let ngramsType = ngramsTypeFromTabType tabType
813 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
814 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
815 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
816
817
818
819 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
820 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
821 :<|> tableNgramsPut
822 :<|> scoresRecomputeTableNgrams cId
823 :<|> getTableNgramsVersion cId
824 :<|> apiNgramsAsync cId
825
826 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
827 apiNgramsTableDoc dId = getTableNgramsDoc dId
828 :<|> tableNgramsPut
829 :<|> scoresRecomputeTableNgrams dId
830 :<|> getTableNgramsVersion dId
831 :<|> apiNgramsAsync dId
832
833 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
834 apiNgramsAsync _dId =
835 serveJobsAPI TableNgramsJob $ \i log ->
836 let
837 log' x = do
838 printDebug "tableNgramsPostChartsAsync" x
839 liftBase $ log x
840 in tableNgramsPostChartsAsync i log'
841
842 -- Did the given list of ngrams changed since the given version?
843 -- The returned value is versioned boolean value, meaning that one always retrieve the
844 -- latest version.
845 -- If the given version is negative then one simply receive the latest version and True.
846 -- Using this function is more precise than simply comparing the latest version number
847 -- with the local version number. Indeed there might be no change to this particular list
848 -- and still the version number has changed because of other lists.
849 --
850 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
851 -- * currentVersion: good computation, good bandwidth, bad precision.
852 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
853 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
854 listNgramsChangedSince :: HasNodeStory env err m
855 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
856 listNgramsChangedSince listId ngramsType version
857 | version < 0 =
858 Versioned <$> currentVersion listId <*> pure True
859 | otherwise =
860 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)