]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Add `NgramsSearchQuery` type
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , getNgramsTable'
73 , setNgramsTableScores
74
75 , Version
76 , Versioned(..)
77 , VersionedWithCount(..)
78 , currentVersion
79 , listNgramsChangedSince
80 , MinSize, MaxSize, OrderBy, NgramsTable
81 , UpdateTableNgramsCharts
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex)
87 import Control.Monad.Reader
88 import Data.Foldable
89 import Data.Map.Strict (Map)
90 import Data.Maybe (fromMaybe)
91 import Data.Monoid
92 import Data.Ord (Down(..))
93 import Data.Patch.Class (Action(act), Transformable(..), ours)
94 import Data.Set (Set)
95 import Data.Text (Text, isInfixOf, toLower, unpack)
96 import Data.Text.Lazy.IO as DTL
97 import Formatting (hprint, int, (%))
98 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
99 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
100 import Gargantext.API.Admin.Types (HasSettings)
101 import Gargantext.API.Ngrams.Types
102 import Gargantext.API.Prelude
103 import Gargantext.Core.NodeStory
104 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasInvalidError, ContextId)
105 import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
106 import Gargantext.API.Ngrams.Tools
107 import Gargantext.Database.Action.Flow.Types
108 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
109 import Gargantext.Database.Admin.Config (userMaster)
110 import Gargantext.Database.Admin.Types.Node (NodeType(..))
111 import Gargantext.Database.Prelude (CmdCommon)
112 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
113 import Gargantext.Database.Query.Table.Node (getNode)
114 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
115 import Gargantext.Database.Query.Table.Node.Select
116 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
117 import Gargantext.Prelude hiding (log)
118 import Gargantext.Prelude.Clock (hasTime, getTime)
119 import Prelude (error)
120 import Servant hiding (Patch)
121 import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
122 import System.IO (stderr)
123 import qualified Data.Aeson.Text as DAT
124 import qualified Data.List as List
125 import qualified Data.Map.Strict as Map
126 import qualified Data.Map.Strict.Patch as PM
127 import qualified Data.Set as Set
128 import qualified Gargantext.API.Metrics as Metrics
129 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
130
131 {-
132 -- TODO sequences of modifications (Patchs)
133 type NgramsIdPatch = Patch NgramsId NgramsPatch
134
135 ngramsPatch :: Int -> NgramsPatch
136 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
137
138 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
139 toEdit n p = Edit n p
140 ngramsIdPatch :: Patch NgramsId NgramsPatch
141 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
142 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
143 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
144 ]
145
146 -- applyPatchBack :: Patch -> IO Patch
147 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
148 -}
149 ------------------------------------------------------------------------
150 ------------------------------------------------------------------------
151 ------------------------------------------------------------------------
152
153 {-
154 -- TODO: Replace.old is ignored which means that if the current list
155 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
156 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
157 -- However this should not happen in non conflicting situations.
158 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
159 mkListsUpdate nt patches =
160 [ (ngramsTypeId nt, ng, listTypeId lt)
161 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
162 , lt <- patch ^.. patch_list . new
163 ]
164
165 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
166 -> NgramsType
167 -> NgramsTablePatch
168 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
169 mkChildrenGroups addOrRem nt patches =
170 [ (ngramsTypeId nt, parent, child)
171 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
172 , child <- patch ^.. patch_children . to addOrRem . folded
173 ]
174 -}
175
176 ------------------------------------------------------------------------
177
178 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
179 => m ()
180 saveNodeStory = do
181 saver <- view hasNodeStorySaver
182 liftBase $ do
183 --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
184 saver
185 --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
186
187
188 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
189 => m ()
190 saveNodeStoryImmediate = do
191 saver <- view hasNodeStoryImmediateSaver
192 liftBase $ do
193 --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
194 saver
195 --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
196
197 listTypeConflictResolution :: ListType -> ListType -> ListType
198 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
199
200
201 ngramsStatePatchConflictResolution
202 :: TableNgrams.NgramsType
203 -> NgramsTerm
204 -> ConflictResolutionNgramsPatch
205 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
206 = (ours, (const ours, ours), (False, False))
207 -- (False, False) mean here that Mod has always priority.
208 -- = (ours, (const ours, ours), (True, False))
209 -- (True, False) <- would mean priority to the left (same as ours).
210 -- undefined {- TODO think this through -}, listTypeConflictResolution)
211
212
213
214
215 -- Current state:
216 -- Insertions are not considered as patches,
217 -- they do not extend history,
218 -- they do not bump version.
219 insertNewOnly :: a -> Maybe b -> a
220 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
221 -- TODO error handling
222
223 {- unused
224 -- TODO refactor with putListNgrams
225 copyListNgrams :: RepoCmdM env err m
226 => NodeId -> NodeId -> NgramsType
227 -> m ()
228 copyListNgrams srcListId dstListId ngramsType = do
229 var <- view repoVar
230 liftBase $ modifyMVar_ var $
231 pure . (r_state . at ngramsType %~ (Just . f . something))
232 saveNodeStory
233 where
234 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
235 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
236
237 -- TODO refactor with putListNgrams
238 -- The list must be non-empty!
239 -- The added ngrams must be non-existent!
240 addListNgrams :: RepoCmdM env err m
241 => NodeId -> NgramsType
242 -> [NgramsElement] -> m ()
243 addListNgrams listId ngramsType nes = do
244 var <- view repoVar
245 liftBase $ modifyMVar_ var $
246 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
247 saveNodeStory
248 where
249 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
250 -}
251
252 -- | TODO: incr the Version number
253 -- && should use patch
254 -- UNSAFE
255
256 setListNgrams :: HasNodeStory env err m
257 => NodeId
258 -> TableNgrams.NgramsType
259 -> Map NgramsTerm NgramsRepoElement
260 -> m ()
261 setListNgrams listId ngramsType ns = do
262 -- printDebug "[setListNgrams]" (listId, ngramsType)
263 getter <- view hasNodeStory
264 var <- liftBase $ (getter ^. nse_getter) [listId]
265 liftBase $ modifyMVar_ var $
266 pure . ( unNodeStory
267 . at listId . _Just
268 . a_state
269 . at ngramsType
270 .~ Just ns
271 )
272 saveNodeStory
273
274
275 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
276 newNgramsFromNgramsStatePatch p =
277 [ text2ngrams (unNgramsTerm n)
278 | (n,np) <- p ^.. _PatchMap
279 -- . each . _PatchMap
280 . each . _NgramsTablePatch
281 . _PatchMap . ifolded . withIndex
282 , _ <- np ^.. patch_new . _Just
283 ]
284
285
286
287
288 commitStatePatch :: ( HasNodeStory env err m
289 , HasNodeStoryImmediateSaver env
290 , HasNodeArchiveStoryImmediateSaver env
291 , CmdCommon env )
292 => ListId
293 -> Versioned NgramsStatePatch'
294 -> m (Versioned NgramsStatePatch')
295 commitStatePatch listId (Versioned _p_version p) = do
296 -- printDebug "[commitStatePatch]" listId
297 var <- getNodeStoryVar [listId]
298 archiveSaver <- view hasNodeArchiveStoryImmediateSaver
299 vq' <- liftBase $ modifyMVar var $ \ns -> do
300 let
301 a = ns ^. unNodeStory . at listId . _Just
302 -- apply patches from version p_version to a ^. a_version
303 -- TODO Check this
304 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
305 q = mconcat $ a ^. a_history
306
307 --printDebug "[commitStatePatch] transformWith" (p,q)
308 -- let tws s = case s of
309 -- (Mod p) -> "Mod"
310 -- _ -> "Rpl"
311 -- printDebug "[commitStatePatch] transformWith" (tws $ p ^. _NgramsPatch, tws $ q ^. _NgramsPatch)
312
313 let
314 (p', q') = transformWith ngramsStatePatchConflictResolution p q
315 a' = a & a_version +~ 1
316 & a_state %~ act p'
317 & a_history %~ (p' :)
318
319 {-
320 -- Ideally we would like to check these properties. However:
321 -- * They should be checked only to debug the code. The client data
322 -- should be able to trigger these.
323 -- * What kind of error should they throw (we are in IO here)?
324 -- * Should we keep modifyMVar?
325 -- * Should we throw the validation in an Exception, catch it around
326 -- modifyMVar and throw it back as an Error?
327 assertValid $ transformable p q
328 assertValid $ applicable p' (r ^. r_state)
329 -}
330 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
331 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
332 let newNs = ( ns & unNodeStory . at listId .~ (Just a')
333 , Versioned (a' ^. a_version) q'
334 )
335
336 -- NOTE Now is the only good time to save the archive history. We
337 -- have the handle to the MVar and we need to save its exact
338 -- snapshot. Node Story archive is a linear table, so it's only
339 -- couple of inserts, it shouldn't take long...
340
341 -- If we postponed saving the archive to the debounce action, we
342 -- would have issues like
343 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
344 -- where the `q` computation from above (which uses the archive)
345 -- would cause incorrect patch application (before the previous
346 -- archive was saved and applied)
347 newNs' <- archiveSaver $ fst newNs
348
349 pure (newNs', snd newNs)
350
351 -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
352 saveNodeStory
353 --saveNodeStoryImmediate
354 -- Save new ngrams
355 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
356
357 pure vq'
358
359
360
361 -- This is a special case of tableNgramsPut where the input patch is empty.
362 tableNgramsPull :: HasNodeStory env err m
363 => ListId
364 -> TableNgrams.NgramsType
365 -> Version
366 -> m (Versioned NgramsTablePatch)
367 tableNgramsPull listId ngramsType p_version = do
368 -- printDebug "[tableNgramsPull]" (listId, ngramsType)
369 var <- getNodeStoryVar [listId]
370 r <- liftBase $ readMVar var
371
372 let
373 a = r ^. unNodeStory . at listId . _Just
374 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
375 q_table = q ^. _PatchMap . at ngramsType . _Just
376
377 pure (Versioned (a ^. a_version) q_table)
378
379
380
381
382 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
383 -- Apply the given patch to the DB and returns the patch to be applied on the
384 -- client.
385 -- TODO-ACCESS check
386 tableNgramsPut :: ( HasNodeStory env err m
387 , HasNodeStoryImmediateSaver env
388 , HasNodeArchiveStoryImmediateSaver env
389 , HasInvalidError err
390 , CmdCommon env
391 )
392 => TabType
393 -> ListId
394 -> Versioned NgramsTablePatch
395 -> m (Versioned NgramsTablePatch)
396 tableNgramsPut tabType listId (Versioned p_version p_table)
397 | p_table == mempty = do
398 -- printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
399 let ngramsType = ngramsTypeFromTabType tabType
400 tableNgramsPull listId ngramsType p_version
401
402 | otherwise = do
403 -- printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
404 let ngramsType = ngramsTypeFromTabType tabType
405 (p, p_validity) = PM.singleton ngramsType p_table
406
407 assertValid p_validity
408
409 ret <- commitStatePatch listId (Versioned p_version p)
410 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
411
412 pure ret
413
414
415
416 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
417 , FlowCmdM env err m
418 , HasNodeError err
419 , HasSettings env
420 , MonadJobStatus m
421 )
422 => UpdateTableNgramsCharts
423 -> JobHandle m
424 -> m ()
425 tableNgramsPostChartsAsync utn jobHandle = do
426 let tabType = utn ^. utn_tab_type
427 let listId = utn ^. utn_list_id
428
429 node <- getNode listId
430 let _nId = node ^. node_id
431 _uId = node ^. node_user_id
432 mCId = node ^. node_parent_id
433
434 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
435 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
436
437 case mCId of
438 Nothing -> do
439 -- printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
440 markStarted 1 jobHandle
441 markFailed Nothing jobHandle
442 Just cId -> do
443 case tabType of
444 Authors -> do
445 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
446 markStarted 1 jobHandle
447 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
448 markComplete jobHandle
449 Institutes -> do
450 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
451 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
452 markStarted 3 jobHandle
453 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
454 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
455 markProgress 1 jobHandle
456 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
457 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
458 markProgress 1 jobHandle
459 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
460 markComplete jobHandle
461 Sources -> do
462 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
463 markStarted 1 jobHandle
464 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
465 markComplete jobHandle
466 Terms -> do
467 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
468 markStarted 6 jobHandle
469 {-
470 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
471 logRefSuccess
472 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
473 logRefSuccess
474 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
475 logRefSuccess
476 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
477 logRefSuccess
478 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
479 logRefSuccess
480 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
481 -}
482 markComplete jobHandle
483 _ -> do
484 -- printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
485 markStarted 1 jobHandle
486 markFailed Nothing jobHandle
487
488 {-
489 { _ne_list :: ListType
490 If we merge the parents/children we can potentially create cycles!
491 , _ne_parent :: Maybe NgramsTerm
492 , _ne_children :: MSet NgramsTerm
493 }
494 -}
495
496 getNgramsTableMap :: HasNodeStory env err m
497 => NodeId
498 -> TableNgrams.NgramsType
499 -> m (Versioned NgramsTableMap)
500 getNgramsTableMap nodeId ngramsType = do
501 v <- getNodeStoryVar [nodeId]
502 repo <- liftBase $ readMVar v
503 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
504 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
505
506
507 dumpJsonTableMap :: HasNodeStory env err m
508 => Text
509 -> NodeId
510 -> TableNgrams.NgramsType
511 -> m ()
512 dumpJsonTableMap fpath nodeId ngramsType = do
513 m <- getNgramsTableMap nodeId ngramsType
514 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
515 pure ()
516
517
518 -- | TODO Errors management
519 -- TODO: polymorphic for Annuaire or Corpus or ...
520 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
521 -- TODO: should take only one ListId
522
523
524 getTableNgrams :: forall env err m.
525 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
526 => NodeType -> NodeId -> TabType
527 -> ListId -> Limit -> Maybe Offset
528 -> Maybe ListType
529 -> Maybe MinSize -> Maybe MaxSize
530 -> Maybe OrderBy
531 -> (NgramsTerm -> Bool)
532 -> m (VersionedWithCount NgramsTable)
533 getTableNgrams _nType nId tabType listId limit_ offset
534 listType minSize maxSize orderBy searchQuery = do
535
536 t0 <- getTime
537 -- lIds <- selectNodesWithUsername NodeList userMaster
538 let
539 ngramsType = ngramsTypeFromTabType tabType
540 offset' = getOffset $ maybe 0 identity offset
541 listType' = maybe (const True) (==) listType
542 minSize' = maybe (const True) (<=) (getMinSize <$> minSize)
543 maxSize' = maybe (const True) (>=) (getMaxSize <$> maxSize)
544
545 rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
546 (tableMap ^. at r)
547 )
548 (ne ^. ne_root)
549
550 selected_node n = minSize' s
551 && maxSize' s
552 && searchQuery (n ^. ne_ngrams)
553 && listType' (n ^. ne_list)
554 where
555 s = n ^. ne_size
556
557 ---------------------------------------
558 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
559 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
560 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
561 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
562 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
563
564 ---------------------------------------
565 -- | Filter the given `tableMap` with the search criteria.
566 filteredNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
567 filteredNodes tableMap = roots
568 where
569 list = Set.fromList $ Map.elems tableMap
570 selected_nodes = list & Set.filter selected_node
571 roots = Set.map (rootOf tableMap) selected_nodes
572
573 -- | For each input root, extends its occurrence count with
574 -- the information found in the subitems.
575 withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
576 withInners tableMap roots = Set.map addSubitemsOccurrences roots
577 where
578 addSubitemsOccurrences :: NgramsElement -> NgramsElement
579 addSubitemsOccurrences e =
580 e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) }
581
582 alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId
583 alterOccurrences occs t = case Map.lookup t tableMap of
584 Nothing -> occs
585 Just e' -> occs <> e' ^. ne_occurrences
586
587 -- | Paginate the results
588 sortAndPaginate :: Set NgramsElement -> [NgramsElement]
589 sortAndPaginate = take (getLimit limit_)
590 . drop offset'
591 . sortOnOrder orderBy
592 . Set.toList
593
594 ---------------------------------------
595
596 let scoresNeeded = needsScores orderBy
597 t1 <- getTime
598
599 versionedTableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
600
601 let tableMap = versionedTableMap ^. v_data
602 let filteredData = filteredNodes tableMap
603
604 let fltrCount = Set.size filteredData
605
606 t2 <- getTime
607 let tableMapSorted = versionedTableMap
608 & v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
609 t3 <- getTime
610 --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
611 liftBase $ do
612 hprint stderr
613 ("getTableNgrams total=" % hasTime
614 % " map1=" % hasTime
615 % " map2=" % hasTime
616 % " map3=" % hasTime
617 % " sql=" % (if scoresNeeded then "map2" else "map3")
618 % "\n"
619 ) t0 t3 t0 t1 t1 t2 t2 t3
620
621 -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
622 pure $ toVersionedWithCount fltrCount tableMapSorted
623
624
625 -- | Helper function to get the ngrams table with scores.
626 getNgramsTable' :: forall env err m.
627 ( HasNodeStory env err m
628 , HasNodeError err
629 , CmdCommon env)
630 => NodeId
631 -> ListId
632 -> TableNgrams.NgramsType
633 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
634 getNgramsTable' nId listId ngramsType = do
635 tableMap <- getNgramsTableMap listId ngramsType
636 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
637 . Map.mapWithKey ngramsElementFromRepo
638
639 -- | Helper function to set scores on an `NgramsTable`.
640 setNgramsTableScores :: forall env err m t.
641 ( Each t t NgramsElement NgramsElement
642 , HasNodeStory env err m
643 , HasNodeError err
644 , CmdCommon env )
645 => NodeId
646 -> ListId
647 -> TableNgrams.NgramsType
648 -> t
649 -> m t
650 setNgramsTableScores nId listId ngramsType table = do
651 t1 <- getTime
652 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
653 --printDebug "[setNgramsTableScores] occurrences" occurrences
654 t2 <- getTime
655 liftBase $ do
656 let ngrams_terms = table ^.. each . ne_ngrams
657 -- printDebug "ngrams_terms" ngrams_terms
658 hprint stderr
659 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
660 (length ngrams_terms) t1 t2
661 let
662 setOcc ne = ne & ne_occurrences .~ Set.fromList (msumOf (at (ne ^. ne_ngrams) . _Just) occurrences)
663
664 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
665
666 pure $ table & each %~ setOcc
667
668
669
670
671 scoresRecomputeTableNgrams :: forall env err m.
672 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
673 => NodeId -> TabType -> ListId -> m Int
674 scoresRecomputeTableNgrams nId tabType listId = do
675 tableMap <- getNgramsTableMap listId ngramsType
676 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
677 . Map.mapWithKey ngramsElementFromRepo
678
679 pure $ 1
680 where
681 ngramsType = ngramsTypeFromTabType tabType
682
683
684 -- APIs
685
686 -- TODO: find a better place for the code above, All APIs stay here
687
688 needsScores :: Maybe OrderBy -> Bool
689 needsScores (Just ScoreAsc) = True
690 needsScores (Just ScoreDesc) = True
691 needsScores _ = False
692
693 type TableNgramsApiGet = Summary " Table Ngrams API Get"
694 :> QueryParamR "ngramsType" TabType
695 :> QueryParamR "list" ListId
696 :> QueryParamR "limit" Limit
697 :> QueryParam "offset" Offset
698 :> QueryParam "listType" ListType
699 :> QueryParam "minTermSize" MinSize
700 :> QueryParam "maxTermSize" MaxSize
701 :> QueryParam "orderBy" OrderBy
702 :> QueryParam "search" Text
703 :> Get '[JSON] (VersionedWithCount NgramsTable)
704
705 type TableNgramsApiPut = Summary " Table Ngrams API Change"
706 :> QueryParamR "ngramsType" TabType
707 :> QueryParamR "list" ListId
708 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
709 :> Put '[JSON] (Versioned NgramsTablePatch)
710
711 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
712 :> QueryParamR "ngramsType" TabType
713 :> QueryParamR "list" ListId
714 :> "recompute" :> Post '[JSON] Int
715
716 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
717 :> QueryParamR "ngramsType" TabType
718 :> QueryParamR "list" ListId
719 :> Get '[JSON] Version
720
721 type TableNgramsApi = TableNgramsApiGet
722 :<|> TableNgramsApiPut
723 :<|> RecomputeScoresNgramsApiGet
724 :<|> "version" :> TableNgramsApiGetVersion
725 :<|> TableNgramsAsyncApi
726
727 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
728 :> "async"
729 :> "charts"
730 :> "update"
731 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
732
733 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
734 => NodeId
735 -> TabType
736 -> ListId
737 -> Limit
738 -> Maybe Offset
739 -> Maybe ListType
740 -> Maybe MinSize -> Maybe MaxSize
741 -> Maybe OrderBy
742 -> Maybe Text -- full text search
743 -> m (VersionedWithCount NgramsTable)
744 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
745 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
746 where
747 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
748
749
750
751 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
752 => NodeId
753 -> TabType
754 -> ListId
755 -> m Version
756 getTableNgramsVersion _nId _tabType listId = currentVersion listId
757
758
759
760 -- TODO: limit?
761 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
762 -- This line above looks like a waste of computation to finally get only the version.
763 -- See the comment about listNgramsChangedSince.
764
765
766 -- | Text search is deactivated for now for ngrams by doc only
767 getTableNgramsDoc :: ( HasNodeStory env err m, HasNodeError err, CmdCommon env)
768 => DocId -> TabType
769 -> ListId -> Limit -> Maybe Offset
770 -> Maybe ListType
771 -> Maybe MinSize -> Maybe MaxSize
772 -> Maybe OrderBy
773 -> Maybe Text -- full text search
774 -> m (VersionedWithCount NgramsTable)
775 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
776 ns <- selectNodesWithUsername NodeList userMaster
777 let ngramsType = ngramsTypeFromTabType tabType
778 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
779 let searchQuery (NgramsTerm nt) = flip Set.member (Set.fromList ngs) nt
780 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
781
782
783
784 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
785 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
786 :<|> tableNgramsPut
787 :<|> scoresRecomputeTableNgrams cId
788 :<|> getTableNgramsVersion cId
789 :<|> apiNgramsAsync cId
790
791 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
792 apiNgramsTableDoc dId = getTableNgramsDoc dId
793 :<|> tableNgramsPut
794 :<|> scoresRecomputeTableNgrams dId
795 :<|> getTableNgramsVersion dId
796 :<|> apiNgramsAsync dId
797
798 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
799 apiNgramsAsync _dId =
800 serveJobsAPI TableNgramsJob $ \jHandle i -> withTracer (printDebug "tableNgramsPostChartsAsync") jHandle $
801 \jHandle' -> tableNgramsPostChartsAsync i jHandle'
802
803 -- Did the given list of ngrams changed since the given version?
804 -- The returned value is versioned boolean value, meaning that one always retrieve the
805 -- latest version.
806 -- If the given version is negative then one simply receive the latest version and True.
807 -- Using this function is more precise than simply comparing the latest version number
808 -- with the local version number. Indeed there might be no change to this particular list
809 -- and still the version number has changed because of other lists.
810 --
811 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
812 -- * currentVersion: good computation, good bandwidth, bad precision.
813 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
814 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
815 listNgramsChangedSince :: HasNodeStory env err m
816 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
817 listNgramsChangedSince listId ngramsType version
818 | version < 0 =
819 Versioned <$> currentVersion listId <*> pure True
820 | otherwise =
821 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)