]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Merge branch 'dev' into dev-openalex
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , searchTableNgrams
32 , getTableNgrams
33 , getTableNgramsCorpus
34 , setListNgrams
35 --, rmListNgrams TODO fix before exporting
36 , apiNgramsTableCorpus
37 , apiNgramsTableDoc
38
39 , NgramsTablePatch
40 , NgramsTableMap
41
42 , NgramsTerm(..)
43
44 , NgramsElement(..)
45 , mkNgramsElement
46
47 , RootParent(..)
48
49 , MSet
50 , mSetFromList
51 , mSetToList
52
53 , Repo(..)
54 , r_version
55 , r_state
56 , r_history
57 , NgramsRepoElement(..)
58 , saveNodeStory
59 , saveNodeStoryImmediate
60 , initRepo
61
62 , TabType(..)
63
64 , QueryParamR
65 , TODO
66
67 -- Internals
68 , getNgramsTableMap
69 , dumpJsonTableMap
70 , tableNgramsPull
71 , tableNgramsPut
72
73 , getNgramsTable'
74 , setNgramsTableScores
75
76 , Version
77 , Versioned(..)
78 , VersionedWithCount(..)
79 , currentVersion
80 , listNgramsChangedSince
81 , MinSize, MaxSize, OrderBy, NgramsTable
82 , UpdateTableNgramsCharts
83 )
84 where
85
86 import Control.Concurrent
87 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
88 import Control.Monad.Reader
89 import Data.Foldable
90 import Data.Map.Strict (Map)
91 import Data.Maybe (fromMaybe)
92 import Data.Monoid
93 import Data.Ord (Down(..))
94 import Data.Patch.Class (Action(act), Transformable(..), ours)
95 import Data.Set (Set)
96 import Data.Text (Text, isInfixOf, toLower, unpack)
97 import Data.Text.Lazy.IO as DTL
98 import Formatting (hprint, int, (%))
99 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
100 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
101 import Gargantext.API.Admin.Types (HasSettings)
102 import Gargantext.API.Ngrams.Types
103 import Gargantext.API.Prelude
104 import Gargantext.Core.NodeStory
105 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasInvalidError, ContextId)
106 import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
107 import Gargantext.API.Ngrams.Tools
108 import Gargantext.Database.Action.Flow.Types
109 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
110 import Gargantext.Database.Admin.Config (userMaster)
111 import Gargantext.Database.Admin.Types.Node (NodeType(..))
112 import Gargantext.Database.Prelude (CmdCommon)
113 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
114 import Gargantext.Database.Query.Table.Node (getNode)
115 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
116 import Gargantext.Database.Query.Table.Node.Select
117 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
118 import Gargantext.Prelude hiding (log)
119 import Gargantext.Prelude.Clock (hasTime, getTime)
120 import Prelude (error)
121 import Servant hiding (Patch)
122 import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
123 import System.IO (stderr)
124 import qualified Data.Aeson.Text as DAT
125 import qualified Data.List as List
126 import qualified Data.Map.Strict as Map
127 import qualified Data.Map.Strict.Patch as PM
128 import qualified Data.Set as Set
129 import qualified Gargantext.API.Metrics as Metrics
130 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
131
132 {-
133 -- TODO sequences of modifications (Patchs)
134 type NgramsIdPatch = Patch NgramsId NgramsPatch
135
136 ngramsPatch :: Int -> NgramsPatch
137 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
138
139 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
140 toEdit n p = Edit n p
141 ngramsIdPatch :: Patch NgramsId NgramsPatch
142 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
143 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
144 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
145 ]
146
147 -- applyPatchBack :: Patch -> IO Patch
148 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
149 -}
150 ------------------------------------------------------------------------
151 ------------------------------------------------------------------------
152 ------------------------------------------------------------------------
153
154 {-
155 -- TODO: Replace.old is ignored which means that if the current list
156 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
157 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
158 -- However this should not happen in non conflicting situations.
159 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
160 mkListsUpdate nt patches =
161 [ (ngramsTypeId nt, ng, listTypeId lt)
162 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
163 , lt <- patch ^.. patch_list . new
164 ]
165
166 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
167 -> NgramsType
168 -> NgramsTablePatch
169 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
170 mkChildrenGroups addOrRem nt patches =
171 [ (ngramsTypeId nt, parent, child)
172 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
173 , child <- patch ^.. patch_children . to addOrRem . folded
174 ]
175 -}
176
177 ------------------------------------------------------------------------
178
179 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
180 => m ()
181 saveNodeStory = do
182 saver <- view hasNodeStorySaver
183 liftBase $ do
184 --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
185 saver
186 --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
187
188
189 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
190 => m ()
191 saveNodeStoryImmediate = do
192 saver <- view hasNodeStoryImmediateSaver
193 liftBase $ do
194 --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
195 saver
196 --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
197
198 listTypeConflictResolution :: ListType -> ListType -> ListType
199 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
200
201
202 ngramsStatePatchConflictResolution
203 :: TableNgrams.NgramsType
204 -> NgramsTerm
205 -> ConflictResolutionNgramsPatch
206 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
207 = (ours, (const ours, ours), (False, False))
208 -- (False, False) mean here that Mod has always priority.
209 -- = (ours, (const ours, ours), (True, False))
210 -- (True, False) <- would mean priority to the left (same as ours).
211 -- undefined {- TODO think this through -}, listTypeConflictResolution)
212
213
214
215
216 -- Current state:
217 -- Insertions are not considered as patches,
218 -- they do not extend history,
219 -- they do not bump version.
220 insertNewOnly :: a -> Maybe b -> a
221 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
222 -- TODO error handling
223
224 {- unused
225 -- TODO refactor with putListNgrams
226 copyListNgrams :: RepoCmdM env err m
227 => NodeId -> NodeId -> NgramsType
228 -> m ()
229 copyListNgrams srcListId dstListId ngramsType = do
230 var <- view repoVar
231 liftBase $ modifyMVar_ var $
232 pure . (r_state . at ngramsType %~ (Just . f . something))
233 saveNodeStory
234 where
235 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
236 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
237
238 -- TODO refactor with putListNgrams
239 -- The list must be non-empty!
240 -- The added ngrams must be non-existent!
241 addListNgrams :: RepoCmdM env err m
242 => NodeId -> NgramsType
243 -> [NgramsElement] -> m ()
244 addListNgrams listId ngramsType nes = do
245 var <- view repoVar
246 liftBase $ modifyMVar_ var $
247 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
248 saveNodeStory
249 where
250 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
251 -}
252
253 -- | TODO: incr the Version number
254 -- && should use patch
255 -- UNSAFE
256
257 setListNgrams :: HasNodeStory env err m
258 => NodeId
259 -> TableNgrams.NgramsType
260 -> Map NgramsTerm NgramsRepoElement
261 -> m ()
262 setListNgrams listId ngramsType ns = do
263 -- printDebug "[setListNgrams]" (listId, ngramsType)
264 getter <- view hasNodeStory
265 var <- liftBase $ (getter ^. nse_getter) [listId]
266 liftBase $ modifyMVar_ var $
267 pure . ( unNodeStory
268 . at listId . _Just
269 . a_state
270 . at ngramsType
271 .~ Just ns
272 )
273 saveNodeStory
274
275
276 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
277 newNgramsFromNgramsStatePatch p =
278 [ text2ngrams (unNgramsTerm n)
279 | (n,np) <- p ^.. _PatchMap
280 -- . each . _PatchMap
281 . each . _NgramsTablePatch
282 . _PatchMap . ifolded . withIndex
283 , _ <- np ^.. patch_new . _Just
284 ]
285
286
287
288
289 commitStatePatch :: ( HasNodeStory env err m
290 , HasNodeStoryImmediateSaver env
291 , HasNodeArchiveStoryImmediateSaver env
292 , CmdCommon env )
293 => ListId
294 -> Versioned NgramsStatePatch'
295 -> m (Versioned NgramsStatePatch')
296 commitStatePatch listId (Versioned _p_version p) = do
297 -- printDebug "[commitStatePatch]" listId
298 var <- getNodeStoryVar [listId]
299 archiveSaver <- view hasNodeArchiveStoryImmediateSaver
300 vq' <- liftBase $ modifyMVar var $ \ns -> do
301 let
302 a = ns ^. unNodeStory . at listId . _Just
303 -- apply patches from version p_version to a ^. a_version
304 -- TODO Check this
305 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
306 q = mconcat $ a ^. a_history
307
308 --printDebug "[commitStatePatch] transformWith" (p,q)
309 -- let tws s = case s of
310 -- (Mod p) -> "Mod"
311 -- _ -> "Rpl"
312 -- printDebug "[commitStatePatch] transformWith" (tws $ p ^. _NgramsPatch, tws $ q ^. _NgramsPatch)
313
314 let
315 (p', q') = transformWith ngramsStatePatchConflictResolution p q
316 a' = a & a_version +~ 1
317 & a_state %~ act p'
318 & a_history %~ (p' :)
319
320 {-
321 -- Ideally we would like to check these properties. However:
322 -- * They should be checked only to debug the code. The client data
323 -- should be able to trigger these.
324 -- * What kind of error should they throw (we are in IO here)?
325 -- * Should we keep modifyMVar?
326 -- * Should we throw the validation in an Exception, catch it around
327 -- modifyMVar and throw it back as an Error?
328 assertValid $ transformable p q
329 assertValid $ applicable p' (r ^. r_state)
330 -}
331 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
332 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
333 let newNs = ( ns & unNodeStory . at listId .~ (Just a')
334 , Versioned (a' ^. a_version) q'
335 )
336
337 -- NOTE Now is the only good time to save the archive history. We
338 -- have the handle to the MVar and we need to save its exact
339 -- snapshot. Node Story archive is a linear table, so it's only
340 -- couple of inserts, it shouldn't take long...
341
342 -- If we postponed saving the archive to the debounce action, we
343 -- would have issues like
344 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
345 -- where the `q` computation from above (which uses the archive)
346 -- would cause incorrect patch application (before the previous
347 -- archive was saved and applied)
348 newNs' <- archiveSaver $ fst newNs
349
350 pure (newNs', snd newNs)
351
352 -- Save new ngrams
353 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
354 -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
355 -- saveNodeStory
356 saveNodeStoryImmediate
357
358 pure vq'
359
360
361
362 -- This is a special case of tableNgramsPut where the input patch is empty.
363 tableNgramsPull :: HasNodeStory env err m
364 => ListId
365 -> TableNgrams.NgramsType
366 -> Version
367 -> m (Versioned NgramsTablePatch)
368 tableNgramsPull listId ngramsType p_version = do
369 -- printDebug "[tableNgramsPull]" (listId, ngramsType)
370 var <- getNodeStoryVar [listId]
371 r <- liftBase $ readMVar var
372
373 let
374 a = r ^. unNodeStory . at listId . _Just
375 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
376 q_table = q ^. _PatchMap . at ngramsType . _Just
377
378 pure (Versioned (a ^. a_version) q_table)
379
380
381
382
383 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
384 -- Apply the given patch to the DB and returns the patch to be applied on the
385 -- client.
386 -- TODO-ACCESS check
387 tableNgramsPut :: ( HasNodeStory env err m
388 , HasNodeStoryImmediateSaver env
389 , HasNodeArchiveStoryImmediateSaver env
390 , HasInvalidError err
391 , CmdCommon env
392 )
393 => TabType
394 -> ListId
395 -> Versioned NgramsTablePatch
396 -> m (Versioned NgramsTablePatch)
397 tableNgramsPut tabType listId (Versioned p_version p_table)
398 | p_table == mempty = do
399 -- printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
400 let ngramsType = ngramsTypeFromTabType tabType
401 tableNgramsPull listId ngramsType p_version
402
403 | otherwise = do
404 -- printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
405 let ngramsType = ngramsTypeFromTabType tabType
406 (p, p_validity) = PM.singleton ngramsType p_table
407
408 assertValid p_validity
409
410 ret <- commitStatePatch listId (Versioned p_version p)
411 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
412
413 pure ret
414
415
416
417 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
418 , FlowCmdM env err m
419 , HasNodeError err
420 , HasSettings env
421 , MonadJobStatus m
422 )
423 => UpdateTableNgramsCharts
424 -> JobHandle m
425 -> m ()
426 tableNgramsPostChartsAsync utn jobHandle = do
427 let tabType = utn ^. utn_tab_type
428 let listId = utn ^. utn_list_id
429
430 node <- getNode listId
431 let _nId = node ^. node_id
432 _uId = node ^. node_user_id
433 mCId = node ^. node_parent_id
434
435 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
436 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
437
438 case mCId of
439 Nothing -> do
440 -- printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
441 markStarted 1 jobHandle
442 markFailed Nothing jobHandle
443 Just cId -> do
444 case tabType of
445 Authors -> do
446 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
447 markStarted 1 jobHandle
448 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
449 markComplete jobHandle
450 Institutes -> do
451 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
452 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
453 markStarted 3 jobHandle
454 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
455 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
456 markProgress 1 jobHandle
457 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
458 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
459 markProgress 1 jobHandle
460 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
461 markComplete jobHandle
462 Sources -> do
463 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
464 markStarted 1 jobHandle
465 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
466 markComplete jobHandle
467 Terms -> do
468 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
469 markStarted 6 jobHandle
470 {-
471 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
472 logRefSuccess
473 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
474 logRefSuccess
475 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
476 logRefSuccess
477 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
478 logRefSuccess
479 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
480 logRefSuccess
481 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
482 -}
483 markComplete jobHandle
484 _ -> do
485 -- printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
486 markStarted 1 jobHandle
487 markFailed Nothing jobHandle
488
489 {-
490 { _ne_list :: ListType
491 If we merge the parents/children we can potentially create cycles!
492 , _ne_parent :: Maybe NgramsTerm
493 , _ne_children :: MSet NgramsTerm
494 }
495 -}
496
497 getNgramsTableMap :: HasNodeStory env err m
498 => NodeId
499 -> TableNgrams.NgramsType
500 -> m (Versioned NgramsTableMap)
501 getNgramsTableMap nodeId ngramsType = do
502 v <- getNodeStoryVar [nodeId]
503 repo <- liftBase $ readMVar v
504 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
505 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
506
507
508 dumpJsonTableMap :: HasNodeStory env err m
509 => Text
510 -> NodeId
511 -> TableNgrams.NgramsType
512 -> m ()
513 dumpJsonTableMap fpath nodeId ngramsType = do
514 m <- getNgramsTableMap nodeId ngramsType
515 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
516 pure ()
517
518
519 -- | TODO Errors management
520 -- TODO: polymorphic for Annuaire or Corpus or ...
521 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
522 -- TODO: should take only one ListId
523
524 -- | /pure/ function to query a 'Map NgramsTerm NgramsElement', according to a
525 -- search function. Returns a /versioned/ 'NgramsTable' which is paginated and
526 -- sorted according to the input 'NgramsSearchQuery', together with the
527 -- occurrences of the elements.
528 searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
529 -> NgramsSearchQuery
530 -- ^ The search query on the retrieved data
531 -> VersionedWithCount NgramsTable
532 searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
533 let tableMap = versionedTableMap ^. v_data
534 filteredData = filterNodes tableMap
535 tableMapSorted = versionedTableMap
536 & v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
537
538 in toVersionedWithCount (Set.size filteredData) tableMapSorted
539 where
540
541 -- | Returns the \"root\" of the 'NgramsElement', or it falls back to the input
542 -- 'NgramsElement' itself, if no root can be found.
543 -- /CAREFUL/: The root we select might /not/ have the same 'listType' we are
544 -- filtering for, in which case we have to change its type to match, if needed.
545 rootOf :: Map NgramsTerm NgramsElement -> NgramsElement -> NgramsElement
546 rootOf tblMap ne = case ne ^. ne_root of
547 Nothing -> ne
548 Just rootKey
549 | Just r <- tblMap ^. at rootKey
550 -- NOTE(adinapoli) It's unclear what is the correct behaviour here: should
551 -- we override the type or we filter out the node altogether?
552 -> over ne_list (\oldList -> fromMaybe oldList _nsq_listType) r
553 | otherwise
554 -> ne
555
556 -- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria
557 -- mandated by 'NgramsSearchQuery'.
558 matchingNode :: NgramsElement -> Bool
559 matchingNode inputNode =
560 let nodeSize = inputNode ^. ne_size
561 matchesListType = maybe (const True) (==) _nsq_listType
562 respectsMinSize = maybe (const True) (<=) (getMinSize <$> _nsq_minSize)
563 respectsMaxSize = maybe (const True) (>=) (getMaxSize <$> _nsq_maxSize)
564
565 in respectsMinSize nodeSize
566 && respectsMaxSize nodeSize
567 && _nsq_searchQuery (inputNode ^. ne_ngrams)
568 && matchesListType (inputNode ^. ne_list)
569
570 sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
571 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
572 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
573 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
574 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
575 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
576
577 -- | Filters the given `tableMap` with the search criteria. It returns
578 -- a set of 'NgramsElement' all matching the input 'NGramsSearchQuery'.
579 filterNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
580 filterNodes tblMap = Set.map (rootOf tblMap) selectedNodes
581 where
582 allNodes = Set.fromList $ Map.elems tblMap
583 selectedNodes = Set.filter matchingNode allNodes
584
585 -- | For each input root, extends its occurrence count with
586 -- the information found in the subitems.
587 withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
588 withInners tblMap roots = Set.map addSubitemsOccurrences roots
589 where
590 addSubitemsOccurrences :: NgramsElement -> NgramsElement
591 addSubitemsOccurrences e =
592 e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) }
593
594 alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId
595 alterOccurrences occs t = case Map.lookup t tblMap of
596 Nothing -> occs
597 Just e' -> occs <> e' ^. ne_occurrences
598
599 -- | Paginate the results
600 sortAndPaginate :: Set NgramsElement -> [NgramsElement]
601 sortAndPaginate xs =
602 let offset' = getOffset $ maybe 0 identity _nsq_offset
603 in take (getLimit _nsq_limit)
604 . drop offset'
605 . sortOnOrder _nsq_orderBy
606 . Set.toList
607 $ xs
608
609
610 getTableNgrams :: forall env err m.
611 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
612 => NodeId
613 -> ListId
614 -> TabType
615 -> NgramsSearchQuery
616 -> m (VersionedWithCount NgramsTable)
617 getTableNgrams nodeId listId tabType searchQuery = do
618 let ngramsType = ngramsTypeFromTabType tabType
619 versionedInput <- getNgramsTable' nodeId listId ngramsType
620 pure $ searchTableNgrams versionedInput searchQuery
621
622
623 -- | Helper function to get the ngrams table with scores.
624 getNgramsTable' :: forall env err m.
625 ( HasNodeStory env err m
626 , HasNodeError err
627 , CmdCommon env)
628 => NodeId
629 -> ListId
630 -> TableNgrams.NgramsType
631 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
632 getNgramsTable' nId listId ngramsType = do
633 tableMap <- getNgramsTableMap listId ngramsType
634 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
635 . Map.mapWithKey ngramsElementFromRepo
636
637 -- | Helper function to set scores on an `NgramsTable`.
638 setNgramsTableScores :: forall env err m t.
639 ( Each t t NgramsElement NgramsElement
640 , HasNodeStory env err m
641 , HasNodeError err
642 , CmdCommon env )
643 => NodeId
644 -> ListId
645 -> TableNgrams.NgramsType
646 -> t
647 -> m t
648 setNgramsTableScores nId listId ngramsType table = do
649 t1 <- getTime
650 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
651 --printDebug "[setNgramsTableScores] occurrences" occurrences
652 t2 <- getTime
653 liftBase $ do
654 let ngrams_terms = table ^.. each . ne_ngrams
655 -- printDebug "ngrams_terms" ngrams_terms
656 hprint stderr
657 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
658 (length ngrams_terms) t1 t2
659 let
660 setOcc ne = ne & ne_occurrences .~ Set.fromList (msumOf (at (ne ^. ne_ngrams) . _Just) occurrences)
661
662 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
663
664 pure $ table & each %~ setOcc
665
666
667
668
669 scoresRecomputeTableNgrams :: forall env err m.
670 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
671 => NodeId -> TabType -> ListId -> m Int
672 scoresRecomputeTableNgrams nId tabType listId = do
673 tableMap <- getNgramsTableMap listId ngramsType
674 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
675 . Map.mapWithKey ngramsElementFromRepo
676
677 pure $ 1
678 where
679 ngramsType = ngramsTypeFromTabType tabType
680
681
682 -- APIs
683
684 -- TODO: find a better place for the code above, All APIs stay here
685
686 needsScores :: Maybe OrderBy -> Bool
687 needsScores (Just ScoreAsc) = True
688 needsScores (Just ScoreDesc) = True
689 needsScores _ = False
690
691 type TableNgramsApiGet = Summary " Table Ngrams API Get"
692 :> QueryParamR "ngramsType" TabType
693 :> QueryParamR "list" ListId
694 :> QueryParamR "limit" Limit
695 :> QueryParam "offset" Offset
696 :> QueryParam "listType" ListType
697 :> QueryParam "minTermSize" MinSize
698 :> QueryParam "maxTermSize" MaxSize
699 :> QueryParam "orderBy" OrderBy
700 :> QueryParam "search" Text
701 :> Get '[JSON] (VersionedWithCount NgramsTable)
702
703 type TableNgramsApiPut = Summary " Table Ngrams API Change"
704 :> QueryParamR "ngramsType" TabType
705 :> QueryParamR "list" ListId
706 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
707 :> Put '[JSON] (Versioned NgramsTablePatch)
708
709 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
710 :> QueryParamR "ngramsType" TabType
711 :> QueryParamR "list" ListId
712 :> "recompute" :> Post '[JSON] Int
713
714 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
715 :> QueryParamR "ngramsType" TabType
716 :> QueryParamR "list" ListId
717 :> Get '[JSON] Version
718
719 type TableNgramsApi = TableNgramsApiGet
720 :<|> TableNgramsApiPut
721 :<|> RecomputeScoresNgramsApiGet
722 :<|> "version" :> TableNgramsApiGetVersion
723 :<|> TableNgramsAsyncApi
724
725 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
726 :> "async"
727 :> "charts"
728 :> "update"
729 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
730
731 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
732 => NodeId
733 -> TabType
734 -> ListId
735 -> Limit
736 -> Maybe Offset
737 -> Maybe ListType
738 -> Maybe MinSize -> Maybe MaxSize
739 -> Maybe OrderBy
740 -> Maybe Text -- full text search
741 -> m (VersionedWithCount NgramsTable)
742 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
743 getTableNgrams nId listId tabType searchQuery
744 where
745 searchQueryFn (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
746 searchQuery = NgramsSearchQuery {
747 _nsq_limit = limit_
748 , _nsq_offset = offset
749 , _nsq_listType = listType
750 , _nsq_minSize = minSize
751 , _nsq_maxSize = maxSize
752 , _nsq_orderBy = orderBy
753 , _nsq_searchQuery = searchQueryFn
754 }
755
756
757
758
759 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
760 => NodeId
761 -> TabType
762 -> ListId
763 -> m Version
764 getTableNgramsVersion _nId _tabType listId = currentVersion listId
765
766
767
768 -- TODO: limit?
769 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
770 -- This line above looks like a waste of computation to finally get only the version.
771 -- See the comment about listNgramsChangedSince.
772
773
774 -- | Text search is deactivated for now for ngrams by doc only
775 getTableNgramsDoc :: ( HasNodeStory env err m, HasNodeError err, CmdCommon env)
776 => DocId -> TabType
777 -> ListId -> Limit -> Maybe Offset
778 -> Maybe ListType
779 -> Maybe MinSize -> Maybe MaxSize
780 -> Maybe OrderBy
781 -> Maybe Text -- full text search
782 -> m (VersionedWithCount NgramsTable)
783 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
784 ns <- selectNodesWithUsername NodeList userMaster
785 let ngramsType = ngramsTypeFromTabType tabType
786 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
787 let searchQueryFn (NgramsTerm nt) = flip Set.member (Set.fromList ngs) nt
788 searchQuery = NgramsSearchQuery {
789 _nsq_limit = limit_
790 , _nsq_offset = offset
791 , _nsq_listType = listType
792 , _nsq_minSize = minSize
793 , _nsq_maxSize = maxSize
794 , _nsq_orderBy = orderBy
795 , _nsq_searchQuery = searchQueryFn
796 }
797 getTableNgrams dId listId tabType searchQuery
798
799
800 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
801 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
802 :<|> tableNgramsPut
803 :<|> scoresRecomputeTableNgrams cId
804 :<|> getTableNgramsVersion cId
805 :<|> apiNgramsAsync cId
806
807 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
808 apiNgramsTableDoc dId = getTableNgramsDoc dId
809 :<|> tableNgramsPut
810 :<|> scoresRecomputeTableNgrams dId
811 :<|> getTableNgramsVersion dId
812 :<|> apiNgramsAsync dId
813
814 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
815 apiNgramsAsync _dId =
816 serveJobsAPI TableNgramsJob $ \jHandle i -> withTracer (printDebug "tableNgramsPostChartsAsync") jHandle $
817 \jHandle' -> tableNgramsPostChartsAsync i jHandle'
818
819 -- Did the given list of ngrams changed since the given version?
820 -- The returned value is versioned boolean value, meaning that one always retrieve the
821 -- latest version.
822 -- If the given version is negative then one simply receive the latest version and True.
823 -- Using this function is more precise than simply comparing the latest version number
824 -- with the local version number. Indeed there might be no change to this particular list
825 -- and still the version number has changed because of other lists.
826 --
827 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
828 -- * currentVersion: good computation, good bandwidth, bad precision.
829 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
830 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
831 listNgramsChangedSince :: HasNodeStory env err m
832 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
833 listNgramsChangedSince listId ngramsType version
834 | version < 0 =
835 Versioned <$> currentVersion listId <*> pure True
836 | otherwise =
837 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)