]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Merge remote-tracking branch 'origin/dev-phyloDebug' into dev-phyloDebug
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , getNgramsTable'
73 , setNgramsTableScores
74
75 , Version
76 , Versioned(..)
77 , VersionedWithCount(..)
78 , currentVersion
79 , listNgramsChangedSince
80 , MinSize, MaxSize, OrderBy, NgramsTable
81 , UpdateTableNgramsCharts
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import Data.Either (Either(..))
90 import Data.Foldable
91 import Data.Map.Strict (Map)
92 import Data.Maybe (fromMaybe)
93 import Data.Monoid
94 import Data.Ord (Down(..))
95 import Data.Patch.Class (Action(act), Transformable(..), ours)
96 import Data.Set (Set)
97 import Data.Swagger hiding (version, patch)
98 import Data.Text (Text, isInfixOf, toLower, unpack, pack)
99 import Data.Text.Lazy.IO as DTL
100 import Formatting (hprint, int, (%))
101 import GHC.Generics (Generic)
102 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
103 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
104 import Gargantext.API.Admin.Types (HasSettings)
105 import Gargantext.API.Ngrams.Types
106 import Gargantext.API.Prelude
107 import Gargantext.Core.NodeStory
108 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasInvalidError, ContextId)
109 import Gargantext.Core.Types.Query (Limit(..), Offset(..))
110 import Gargantext.API.Ngrams.Tools
111 import Gargantext.Database.Action.Flow.Types
112 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
113 import Gargantext.Database.Admin.Config (userMaster)
114 import Gargantext.Database.Admin.Types.Node (NodeType(..))
115 import Gargantext.Database.Prelude (CmdCommon)
116 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
117 import Gargantext.Database.Query.Table.Node (getNode)
118 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
119 import Gargantext.Database.Query.Table.Node.Select
120 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
121 import Gargantext.Prelude hiding (log)
122 import Gargantext.Prelude.Clock (hasTime, getTime)
123 import Prelude (error)
124 import Servant hiding (Patch)
125 import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
126 import System.IO (stderr)
127 import Test.QuickCheck (elements)
128 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
129 import qualified Data.Aeson.Text as DAT
130 import qualified Data.List as List
131 import qualified Data.Map.Strict as Map
132 import qualified Data.Map.Strict.Patch as PM
133 import qualified Data.Set as Set
134 import qualified Gargantext.API.Metrics as Metrics
135 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
136
137 {-
138 -- TODO sequences of modifications (Patchs)
139 type NgramsIdPatch = Patch NgramsId NgramsPatch
140
141 ngramsPatch :: Int -> NgramsPatch
142 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
143
144 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
145 toEdit n p = Edit n p
146 ngramsIdPatch :: Patch NgramsId NgramsPatch
147 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
148 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
149 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
150 ]
151
152 -- applyPatchBack :: Patch -> IO Patch
153 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
154 -}
155 ------------------------------------------------------------------------
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158
159 {-
160 -- TODO: Replace.old is ignored which means that if the current list
161 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
162 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
163 -- However this should not happen in non conflicting situations.
164 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
165 mkListsUpdate nt patches =
166 [ (ngramsTypeId nt, ng, listTypeId lt)
167 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
168 , lt <- patch ^.. patch_list . new
169 ]
170
171 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
172 -> NgramsType
173 -> NgramsTablePatch
174 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
175 mkChildrenGroups addOrRem nt patches =
176 [ (ngramsTypeId nt, parent, child)
177 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
178 , child <- patch ^.. patch_children . to addOrRem . folded
179 ]
180 -}
181
182 ------------------------------------------------------------------------
183
184 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
185 => m ()
186 saveNodeStory = do
187 saver <- view hasNodeStorySaver
188 liftBase $ do
189 --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
190 saver
191 --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
192
193
194 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
195 => m ()
196 saveNodeStoryImmediate = do
197 saver <- view hasNodeStoryImmediateSaver
198 liftBase $ do
199 --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
200 saver
201 --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
202
203 listTypeConflictResolution :: ListType -> ListType -> ListType
204 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
205
206
207 ngramsStatePatchConflictResolution
208 :: TableNgrams.NgramsType
209 -> NgramsTerm
210 -> ConflictResolutionNgramsPatch
211 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
212 = (ours, (const ours, ours), (False, False))
213 -- (False, False) mean here that Mod has always priority.
214 -- = (ours, (const ours, ours), (True, False))
215 -- (True, False) <- would mean priority to the left (same as ours).
216 -- undefined {- TODO think this through -}, listTypeConflictResolution)
217
218
219
220
221 -- Current state:
222 -- Insertions are not considered as patches,
223 -- they do not extend history,
224 -- they do not bump version.
225 insertNewOnly :: a -> Maybe b -> a
226 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
227 -- TODO error handling
228
229 {- unused
230 -- TODO refactor with putListNgrams
231 copyListNgrams :: RepoCmdM env err m
232 => NodeId -> NodeId -> NgramsType
233 -> m ()
234 copyListNgrams srcListId dstListId ngramsType = do
235 var <- view repoVar
236 liftBase $ modifyMVar_ var $
237 pure . (r_state . at ngramsType %~ (Just . f . something))
238 saveNodeStory
239 where
240 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
241 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
242
243 -- TODO refactor with putListNgrams
244 -- The list must be non-empty!
245 -- The added ngrams must be non-existent!
246 addListNgrams :: RepoCmdM env err m
247 => NodeId -> NgramsType
248 -> [NgramsElement] -> m ()
249 addListNgrams listId ngramsType nes = do
250 var <- view repoVar
251 liftBase $ modifyMVar_ var $
252 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
253 saveNodeStory
254 where
255 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
256 -}
257
258 -- | TODO: incr the Version number
259 -- && should use patch
260 -- UNSAFE
261
262 setListNgrams :: HasNodeStory env err m
263 => NodeId
264 -> TableNgrams.NgramsType
265 -> Map NgramsTerm NgramsRepoElement
266 -> m ()
267 setListNgrams listId ngramsType ns = do
268 -- printDebug "[setListNgrams]" (listId, ngramsType)
269 getter <- view hasNodeStory
270 var <- liftBase $ (getter ^. nse_getter) [listId]
271 liftBase $ modifyMVar_ var $
272 pure . ( unNodeStory
273 . at listId . _Just
274 . a_state
275 . at ngramsType
276 .~ Just ns
277 )
278 saveNodeStory
279
280
281 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
282 newNgramsFromNgramsStatePatch p =
283 [ text2ngrams (unNgramsTerm n)
284 | (n,np) <- p ^.. _PatchMap
285 -- . each . _PatchMap
286 . each . _NgramsTablePatch
287 . _PatchMap . ifolded . withIndex
288 , _ <- np ^.. patch_new . _Just
289 ]
290
291
292
293
294 commitStatePatch :: ( HasNodeStory env err m
295 , HasNodeStoryImmediateSaver env
296 , HasNodeArchiveStoryImmediateSaver env
297 , CmdCommon env )
298 => ListId
299 -> Versioned NgramsStatePatch'
300 -> m (Versioned NgramsStatePatch')
301 commitStatePatch listId (Versioned _p_version p) = do
302 -- printDebug "[commitStatePatch]" listId
303 var <- getNodeStoryVar [listId]
304 archiveSaver <- view hasNodeArchiveStoryImmediateSaver
305 vq' <- liftBase $ modifyMVar var $ \ns -> do
306 let
307 a = ns ^. unNodeStory . at listId . _Just
308 -- apply patches from version p_version to a ^. a_version
309 -- TODO Check this
310 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
311 q = mconcat $ a ^. a_history
312
313 --printDebug "[commitStatePatch] transformWith" (p,q)
314 -- let tws s = case s of
315 -- (Mod p) -> "Mod"
316 -- _ -> "Rpl"
317 -- printDebug "[commitStatePatch] transformWith" (tws $ p ^. _NgramsPatch, tws $ q ^. _NgramsPatch)
318
319 let
320 (p', q') = transformWith ngramsStatePatchConflictResolution p q
321 a' = a & a_version +~ 1
322 & a_state %~ act p'
323 & a_history %~ (p' :)
324
325 {-
326 -- Ideally we would like to check these properties. However:
327 -- * They should be checked only to debug the code. The client data
328 -- should be able to trigger these.
329 -- * What kind of error should they throw (we are in IO here)?
330 -- * Should we keep modifyMVar?
331 -- * Should we throw the validation in an Exception, catch it around
332 -- modifyMVar and throw it back as an Error?
333 assertValid $ transformable p q
334 assertValid $ applicable p' (r ^. r_state)
335 -}
336 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
337 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
338 let newNs = ( ns & unNodeStory . at listId .~ (Just a')
339 , Versioned (a' ^. a_version) q'
340 )
341
342 -- NOTE Now is the only good time to save the archive history. We
343 -- have the handle to the MVar and we need to save its exact
344 -- snapshot. Node Story archive is a linear table, so it's only
345 -- couple of inserts, it shouldn't take long...
346
347 -- If we postponed saving the archive to the debounce action, we
348 -- would have issues like
349 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
350 -- where the `q` computation from above (which uses the archive)
351 -- would cause incorrect patch application (before the previous
352 -- archive was saved and applied)
353 newNs' <- archiveSaver $ fst newNs
354
355 pure (newNs', snd newNs)
356
357 -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
358 saveNodeStory
359 --saveNodeStoryImmediate
360 -- Save new ngrams
361 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
362
363 pure vq'
364
365
366
367 -- This is a special case of tableNgramsPut where the input patch is empty.
368 tableNgramsPull :: HasNodeStory env err m
369 => ListId
370 -> TableNgrams.NgramsType
371 -> Version
372 -> m (Versioned NgramsTablePatch)
373 tableNgramsPull listId ngramsType p_version = do
374 -- printDebug "[tableNgramsPull]" (listId, ngramsType)
375 var <- getNodeStoryVar [listId]
376 r <- liftBase $ readMVar var
377
378 let
379 a = r ^. unNodeStory . at listId . _Just
380 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
381 q_table = q ^. _PatchMap . at ngramsType . _Just
382
383 pure (Versioned (a ^. a_version) q_table)
384
385
386
387
388 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
389 -- Apply the given patch to the DB and returns the patch to be applied on the
390 -- client.
391 -- TODO-ACCESS check
392 tableNgramsPut :: ( HasNodeStory env err m
393 , HasNodeStoryImmediateSaver env
394 , HasNodeArchiveStoryImmediateSaver env
395 , HasInvalidError err
396 , CmdCommon env
397 )
398 => TabType
399 -> ListId
400 -> Versioned NgramsTablePatch
401 -> m (Versioned NgramsTablePatch)
402 tableNgramsPut tabType listId (Versioned p_version p_table)
403 | p_table == mempty = do
404 -- printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
405 let ngramsType = ngramsTypeFromTabType tabType
406 tableNgramsPull listId ngramsType p_version
407
408 | otherwise = do
409 -- printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
410 let ngramsType = ngramsTypeFromTabType tabType
411 (p, p_validity) = PM.singleton ngramsType p_table
412
413 assertValid p_validity
414
415 ret <- commitStatePatch listId (Versioned p_version p)
416 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
417
418 pure ret
419
420
421
422 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
423 , FlowCmdM env err m
424 , HasNodeError err
425 , HasSettings env
426 , MonadJobStatus m
427 )
428 => UpdateTableNgramsCharts
429 -> JobHandle m
430 -> m ()
431 tableNgramsPostChartsAsync utn jobHandle = do
432 let tabType = utn ^. utn_tab_type
433 let listId = utn ^. utn_list_id
434
435 node <- getNode listId
436 let _nId = node ^. node_id
437 _uId = node ^. node_user_id
438 mCId = node ^. node_parent_id
439
440 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
441 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
442
443 case mCId of
444 Nothing -> do
445 -- printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
446 markStarted 1 jobHandle
447 markFailed Nothing jobHandle
448 Just cId -> do
449 case tabType of
450 Authors -> do
451 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
452 markStarted 1 jobHandle
453 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
454 markComplete jobHandle
455 Institutes -> do
456 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
457 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
458 markStarted 3 jobHandle
459 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
460 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
461 markProgress 1 jobHandle
462 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
463 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
464 markProgress 1 jobHandle
465 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
466 markComplete jobHandle
467 Sources -> do
468 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
469 markStarted 1 jobHandle
470 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
471 markComplete jobHandle
472 Terms -> do
473 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
474 markStarted 6 jobHandle
475 {-
476 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
477 logRefSuccess
478 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
479 logRefSuccess
480 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
481 logRefSuccess
482 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
483 logRefSuccess
484 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
485 logRefSuccess
486 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
487 -}
488 markComplete jobHandle
489 _ -> do
490 -- printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
491 markStarted 1 jobHandle
492 markFailed Nothing jobHandle
493
494 {-
495 { _ne_list :: ListType
496 If we merge the parents/children we can potentially create cycles!
497 , _ne_parent :: Maybe NgramsTerm
498 , _ne_children :: MSet NgramsTerm
499 }
500 -}
501
502 getNgramsTableMap :: HasNodeStory env err m
503 => NodeId
504 -> TableNgrams.NgramsType
505 -> m (Versioned NgramsTableMap)
506 getNgramsTableMap nodeId ngramsType = do
507 v <- getNodeStoryVar [nodeId]
508 repo <- liftBase $ readMVar v
509 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
510 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
511
512
513 dumpJsonTableMap :: HasNodeStory env err m
514 => Text
515 -> NodeId
516 -> TableNgrams.NgramsType
517 -> m ()
518 dumpJsonTableMap fpath nodeId ngramsType = do
519 m <- getNgramsTableMap nodeId ngramsType
520 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
521 pure ()
522
523
524 type MinSize = Int
525 type MaxSize = Int
526
527 -- | TODO Errors management
528 -- TODO: polymorphic for Annuaire or Corpus or ...
529 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
530 -- TODO: should take only one ListId
531
532
533 getTableNgrams :: forall env err m.
534 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
535 => NodeType -> NodeId -> TabType
536 -> ListId -> Limit -> Maybe Offset
537 -> Maybe ListType
538 -> Maybe MinSize -> Maybe MaxSize
539 -> Maybe OrderBy
540 -> (NgramsTerm -> Bool)
541 -> m (VersionedWithCount NgramsTable)
542 getTableNgrams _nType nId tabType listId limit_ offset
543 listType minSize maxSize orderBy searchQuery = do
544
545 t0 <- getTime
546 -- lIds <- selectNodesWithUsername NodeList userMaster
547 let
548 ngramsType = ngramsTypeFromTabType tabType
549 offset' = getOffset $ maybe 0 identity offset
550 listType' = maybe (const True) (==) listType
551 minSize' = maybe (const True) (<=) minSize
552 maxSize' = maybe (const True) (>=) maxSize
553
554 rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
555 (tableMap ^. at r)
556 )
557 (ne ^. ne_root)
558
559 selected_node n = minSize' s
560 && maxSize' s
561 && searchQuery (n ^. ne_ngrams)
562 && listType' (n ^. ne_list)
563 where
564 s = n ^. ne_size
565
566 ---------------------------------------
567 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
568 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
569 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
570 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
571 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
572
573 ---------------------------------------
574 -- | Filter the given `tableMap` with the search criteria.
575 filteredNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
576 filteredNodes tableMap = roots
577 where
578 list = Set.fromList $ Map.elems tableMap
579 selected_nodes = list & Set.filter selected_node
580 roots = Set.map (rootOf tableMap) selected_nodes
581
582 -- | For each input root, extends its occurrence count with
583 -- the information found in the subitems.
584 withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
585 withInners tableMap roots = Set.map addSubitemsOccurrences roots
586 where
587 addSubitemsOccurrences :: NgramsElement -> NgramsElement
588 addSubitemsOccurrences e =
589 e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) }
590
591 alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId
592 alterOccurrences occs t = case Map.lookup t tableMap of
593 Nothing -> occs
594 Just e' -> occs <> e' ^. ne_occurrences
595
596 -- | Paginate the results
597 sortAndPaginate :: Set NgramsElement -> [NgramsElement]
598 sortAndPaginate = take (getLimit limit_)
599 . drop offset'
600 . sortOnOrder orderBy
601 . Set.toList
602
603 ---------------------------------------
604
605 let scoresNeeded = needsScores orderBy
606 t1 <- getTime
607
608 versionedTableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
609
610 let tableMap = versionedTableMap ^. v_data
611 let filteredData = filteredNodes tableMap
612
613 let fltrCount = Set.size filteredData
614
615 t2 <- getTime
616 let tableMapSorted = versionedTableMap
617 & v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
618 t3 <- getTime
619 --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
620 liftBase $ do
621 hprint stderr
622 ("getTableNgrams total=" % hasTime
623 % " map1=" % hasTime
624 % " map2=" % hasTime
625 % " map3=" % hasTime
626 % " sql=" % (if scoresNeeded then "map2" else "map3")
627 % "\n"
628 ) t0 t3 t0 t1 t1 t2 t2 t3
629
630 -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
631 pure $ toVersionedWithCount fltrCount tableMapSorted
632
633
634 -- | Helper function to get the ngrams table with scores.
635 getNgramsTable' :: forall env err m.
636 ( HasNodeStory env err m
637 , HasNodeError err
638 , CmdCommon env)
639 => NodeId
640 -> ListId
641 -> TableNgrams.NgramsType
642 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
643 getNgramsTable' nId listId ngramsType = do
644 tableMap <- getNgramsTableMap listId ngramsType
645 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
646 . Map.mapWithKey ngramsElementFromRepo
647
648 -- | Helper function to set scores on an `NgramsTable`.
649 setNgramsTableScores :: forall env err m t.
650 ( Each t t NgramsElement NgramsElement
651 , HasNodeStory env err m
652 , HasNodeError err
653 , CmdCommon env )
654 => NodeId
655 -> ListId
656 -> TableNgrams.NgramsType
657 -> t
658 -> m t
659 setNgramsTableScores nId listId ngramsType table = do
660 t1 <- getTime
661 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
662 --printDebug "[setNgramsTableScores] occurrences" occurrences
663 t2 <- getTime
664 liftBase $ do
665 let ngrams_terms = table ^.. each . ne_ngrams
666 -- printDebug "ngrams_terms" ngrams_terms
667 hprint stderr
668 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
669 (length ngrams_terms) t1 t2
670 let
671 setOcc ne = ne & ne_occurrences .~ Set.fromList (msumOf (at (ne ^. ne_ngrams) . _Just) occurrences)
672
673 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
674
675 pure $ table & each %~ setOcc
676
677
678
679
680 scoresRecomputeTableNgrams :: forall env err m.
681 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
682 => NodeId -> TabType -> ListId -> m Int
683 scoresRecomputeTableNgrams nId tabType listId = do
684 tableMap <- getNgramsTableMap listId ngramsType
685 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
686 . Map.mapWithKey ngramsElementFromRepo
687
688 pure $ 1
689 where
690 ngramsType = ngramsTypeFromTabType tabType
691
692
693 -- APIs
694
695 -- TODO: find a better place for the code above, All APIs stay here
696
697 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
698 deriving (Generic, Enum, Bounded, Read, Show)
699
700 instance FromHttpApiData OrderBy
701 where
702 parseUrlPiece "TermAsc" = pure TermAsc
703 parseUrlPiece "TermDesc" = pure TermDesc
704 parseUrlPiece "ScoreAsc" = pure ScoreAsc
705 parseUrlPiece "ScoreDesc" = pure ScoreDesc
706 parseUrlPiece _ = Left "Unexpected value of OrderBy"
707
708 instance ToHttpApiData OrderBy where
709 toUrlPiece = pack . show
710
711 instance ToParamSchema OrderBy
712 instance FromJSON OrderBy
713 instance ToJSON OrderBy
714 instance ToSchema OrderBy
715 instance Arbitrary OrderBy
716 where
717 arbitrary = elements [minBound..maxBound]
718
719 needsScores :: Maybe OrderBy -> Bool
720 needsScores (Just ScoreAsc) = True
721 needsScores (Just ScoreDesc) = True
722 needsScores _ = False
723
724 type TableNgramsApiGet = Summary " Table Ngrams API Get"
725 :> QueryParamR "ngramsType" TabType
726 :> QueryParamR "list" ListId
727 :> QueryParamR "limit" Limit
728 :> QueryParam "offset" Offset
729 :> QueryParam "listType" ListType
730 :> QueryParam "minTermSize" MinSize
731 :> QueryParam "maxTermSize" MaxSize
732 :> QueryParam "orderBy" OrderBy
733 :> QueryParam "search" Text
734 :> Get '[JSON] (VersionedWithCount NgramsTable)
735
736 type TableNgramsApiPut = Summary " Table Ngrams API Change"
737 :> QueryParamR "ngramsType" TabType
738 :> QueryParamR "list" ListId
739 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
740 :> Put '[JSON] (Versioned NgramsTablePatch)
741
742 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
743 :> QueryParamR "ngramsType" TabType
744 :> QueryParamR "list" ListId
745 :> "recompute" :> Post '[JSON] Int
746
747 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
748 :> QueryParamR "ngramsType" TabType
749 :> QueryParamR "list" ListId
750 :> Get '[JSON] Version
751
752 type TableNgramsApi = TableNgramsApiGet
753 :<|> TableNgramsApiPut
754 :<|> RecomputeScoresNgramsApiGet
755 :<|> "version" :> TableNgramsApiGetVersion
756 :<|> TableNgramsAsyncApi
757
758 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
759 :> "async"
760 :> "charts"
761 :> "update"
762 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
763
764 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
765 => NodeId
766 -> TabType
767 -> ListId
768 -> Limit
769 -> Maybe Offset
770 -> Maybe ListType
771 -> Maybe MinSize -> Maybe MaxSize
772 -> Maybe OrderBy
773 -> Maybe Text -- full text search
774 -> m (VersionedWithCount NgramsTable)
775 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
776 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
777 where
778 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
779
780
781
782 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
783 => NodeId
784 -> TabType
785 -> ListId
786 -> m Version
787 getTableNgramsVersion _nId _tabType listId = currentVersion listId
788
789
790
791 -- TODO: limit?
792 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
793 -- This line above looks like a waste of computation to finally get only the version.
794 -- See the comment about listNgramsChangedSince.
795
796
797 -- | Text search is deactivated for now for ngrams by doc only
798 getTableNgramsDoc :: ( HasNodeStory env err m, HasNodeError err, CmdCommon env)
799 => DocId -> TabType
800 -> ListId -> Limit -> Maybe Offset
801 -> Maybe ListType
802 -> Maybe MinSize -> Maybe MaxSize
803 -> Maybe OrderBy
804 -> Maybe Text -- full text search
805 -> m (VersionedWithCount NgramsTable)
806 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
807 ns <- selectNodesWithUsername NodeList userMaster
808 let ngramsType = ngramsTypeFromTabType tabType
809 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
810 let searchQuery (NgramsTerm nt) = flip Set.member (Set.fromList ngs) nt
811 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
812
813
814
815 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
816 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
817 :<|> tableNgramsPut
818 :<|> scoresRecomputeTableNgrams cId
819 :<|> getTableNgramsVersion cId
820 :<|> apiNgramsAsync cId
821
822 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
823 apiNgramsTableDoc dId = getTableNgramsDoc dId
824 :<|> tableNgramsPut
825 :<|> scoresRecomputeTableNgrams dId
826 :<|> getTableNgramsVersion dId
827 :<|> apiNgramsAsync dId
828
829 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
830 apiNgramsAsync _dId =
831 serveJobsAPI TableNgramsJob $ \jHandle i -> withTracer (printDebug "tableNgramsPostChartsAsync") jHandle $
832 \jHandle' -> tableNgramsPostChartsAsync i jHandle'
833
834 -- Did the given list of ngrams changed since the given version?
835 -- The returned value is versioned boolean value, meaning that one always retrieve the
836 -- latest version.
837 -- If the given version is negative then one simply receive the latest version and True.
838 -- Using this function is more precise than simply comparing the latest version number
839 -- with the local version number. Indeed there might be no change to this particular list
840 -- and still the version number has changed because of other lists.
841 --
842 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
843 -- * currentVersion: good computation, good bandwidth, bad precision.
844 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
845 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
846 listNgramsChangedSince :: HasNodeStory env err m
847 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
848 listNgramsChangedSince listId ngramsType version
849 | version < 0 =
850 Versioned <$> currentVersion listId <*> pure True
851 | otherwise =
852 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)