]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[OPTIM] Ngrams Table query
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , Version
73 , Versioned(..)
74 , VersionedWithCount(..)
75 , currentVersion
76 , listNgramsChangedSince
77 , MinSize, MaxSize, OrderBy, NgramsTable
78 , UpdateTableNgramsCharts
79 )
80 where
81
82 import Control.Concurrent
83 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
84 import Control.Monad.Reader
85 import Data.Aeson hiding ((.=))
86 import Data.Either (Either(..))
87 import Data.Foldable
88 import Data.Map.Strict (Map)
89 import Data.Maybe (fromMaybe)
90 import Data.Monoid
91 import Data.Ord (Down(..))
92 import Data.Patch.Class (Action(act), Transformable(..), ours)
93 import Data.Swagger hiding (version, patch)
94 import Data.Text (Text, isInfixOf, unpack, pack)
95 import Data.Text.Lazy.IO as DTL
96 import Formatting (hprint, int, (%))
97 import GHC.Generics (Generic)
98 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
99 import Gargantext.API.Admin.Types (HasSettings)
100 import Gargantext.API.Job
101 import Gargantext.API.Ngrams.Types
102 import Gargantext.API.Prelude
103 import Gargantext.Core.NodeStory
104 import Gargantext.Core.Mail.Types (HasMail)
105 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
106 import Gargantext.API.Ngrams.Tools
107 import Gargantext.Database.Action.Flow.Types
108 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
109 import Gargantext.Database.Admin.Config (userMaster)
110 import Gargantext.Database.Admin.Types.Node (NodeType(..))
111 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
112 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
113 import Gargantext.Database.Query.Table.Node (getNode)
114 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
115 import Gargantext.Database.Query.Table.Node.Select
116 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
117 import Gargantext.Prelude hiding (log)
118 import Gargantext.Prelude.Clock (hasTime, getTime)
119 import Prelude (error)
120 import Servant hiding (Patch)
121 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
122 import System.IO (stderr)
123 import Test.QuickCheck (elements)
124 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
125 import qualified Data.Aeson.Text as DAT
126 import qualified Data.List as List
127 import qualified Data.Map.Strict as Map
128 import qualified Data.Map.Strict.Patch as PM
129 import qualified Data.Set as S
130 import qualified Data.Set as Set
131 import qualified Gargantext.API.Metrics as Metrics
132 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
133
134 {-
135 -- TODO sequences of modifications (Patchs)
136 type NgramsIdPatch = Patch NgramsId NgramsPatch
137
138 ngramsPatch :: Int -> NgramsPatch
139 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
140
141 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
142 toEdit n p = Edit n p
143 ngramsIdPatch :: Patch NgramsId NgramsPatch
144 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
145 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
146 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
147 ]
148
149 -- applyPatchBack :: Patch -> IO Patch
150 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
151 -}
152 ------------------------------------------------------------------------
153 ------------------------------------------------------------------------
154 ------------------------------------------------------------------------
155
156 {-
157 -- TODO: Replace.old is ignored which means that if the current list
158 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
159 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
160 -- However this should not happen in non conflicting situations.
161 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
162 mkListsUpdate nt patches =
163 [ (ngramsTypeId nt, ng, listTypeId lt)
164 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
165 , lt <- patch ^.. patch_list . new
166 ]
167
168 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
169 -> NgramsType
170 -> NgramsTablePatch
171 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
172 mkChildrenGroups addOrRem nt patches =
173 [ (ngramsTypeId nt, parent, child)
174 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
175 , child <- patch ^.. patch_children . to addOrRem . folded
176 ]
177 -}
178
179 ------------------------------------------------------------------------
180
181 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
182 => m ()
183 saveNodeStory = do
184 saver <- view hasNodeStorySaver
185 liftBase $ do
186 Gargantext.Prelude.putStrLn "---- Running node story saver ----"
187 saver
188 Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
189
190
191 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
192 => m ()
193 saveNodeStoryImmediate = do
194 saver <- view hasNodeStoryImmediateSaver
195 liftBase $ do
196 Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
197 saver
198 Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
199
200
201 listTypeConflictResolution :: ListType -> ListType -> ListType
202 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
203
204
205 ngramsStatePatchConflictResolution
206 :: TableNgrams.NgramsType
207 -> NgramsTerm
208 -> ConflictResolutionNgramsPatch
209 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
210 -- = (ours, (const ours, ours), (False, False))
211 -- (False, False) mean here that Mod has always priority.
212 = (ours, (const ours, ours), (True, False))
213 -- (True, False) <- would mean priority to the left (same as ours).
214 -- undefined {- TODO think this through -}, listTypeConflictResolution)
215
216
217
218
219 -- Current state:
220 -- Insertions are not considered as patches,
221 -- they do not extend history,
222 -- they do not bump version.
223 insertNewOnly :: a -> Maybe b -> a
224 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
225 -- TODO error handling
226
227 {- unused
228 -- TODO refactor with putListNgrams
229 copyListNgrams :: RepoCmdM env err m
230 => NodeId -> NodeId -> NgramsType
231 -> m ()
232 copyListNgrams srcListId dstListId ngramsType = do
233 var <- view repoVar
234 liftBase $ modifyMVar_ var $
235 pure . (r_state . at ngramsType %~ (Just . f . something))
236 saveNodeStory
237 where
238 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
239 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
240
241 -- TODO refactor with putListNgrams
242 -- The list must be non-empty!
243 -- The added ngrams must be non-existent!
244 addListNgrams :: RepoCmdM env err m
245 => NodeId -> NgramsType
246 -> [NgramsElement] -> m ()
247 addListNgrams listId ngramsType nes = do
248 var <- view repoVar
249 liftBase $ modifyMVar_ var $
250 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
251 saveNodeStory
252 where
253 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
254 -}
255
256 -- | TODO: incr the Version number
257 -- && should use patch
258 -- UNSAFE
259
260 setListNgrams :: HasNodeStory env err m
261 => NodeId
262 -> TableNgrams.NgramsType
263 -> Map NgramsTerm NgramsRepoElement
264 -> m ()
265 setListNgrams listId ngramsType ns = do
266 -- printDebug "[setListNgrams]" (listId, ngramsType)
267 getter <- view hasNodeStory
268 var <- liftBase $ (getter ^. nse_getter) [listId]
269 liftBase $ modifyMVar_ var $
270 pure . ( unNodeStory
271 . at listId . _Just
272 . a_state
273 . at ngramsType
274 .~ Just ns
275 )
276 saveNodeStory
277
278
279 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
280 newNgramsFromNgramsStatePatch p =
281 [ text2ngrams (unNgramsTerm n)
282 | (n,np) <- p ^.. _PatchMap
283 -- . each . _PatchMap
284 . each . _NgramsTablePatch
285 . _PatchMap . ifolded . withIndex
286 , _ <- np ^.. patch_new . _Just
287 ]
288
289
290
291
292 commitStatePatch :: (HasNodeStory env err m, HasMail env)
293 => ListId
294 -> Versioned NgramsStatePatch'
295 -> m (Versioned NgramsStatePatch')
296 commitStatePatch listId (Versioned _p_version p) = do
297 -- printDebug "[commitStatePatch]" listId
298 var <- getNodeStoryVar [listId]
299 vq' <- liftBase $ modifyMVar var $ \ns -> do
300 let
301 a = ns ^. unNodeStory . at listId . _Just
302 -- apply patches from version p_version to a ^. a_version
303 -- TODO Check this
304 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
305 q = mconcat $ a ^. a_history
306
307 printDebug "transformWith" (p,q)
308
309 let
310 (p', q') = transformWith ngramsStatePatchConflictResolution p q
311 a' = a & a_version +~ 1
312 & a_state %~ act p'
313 & a_history %~ (p' :)
314
315 {-
316 -- Ideally we would like to check these properties. However:
317 -- * They should be checked only to debug the code. The client data
318 -- should be able to trigger these.
319 -- * What kind of error should they throw (we are in IO here)?
320 -- * Should we keep modifyMVar?
321 -- * Should we throw the validation in an Exception, catch it around
322 -- modifyMVar and throw it back as an Error?
323 assertValid $ transformable p q
324 assertValid $ applicable p' (r ^. r_state)
325 -}
326 printDebug "[commitStatePatch] a version" (a ^. a_version)
327 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
328 pure ( ns & unNodeStory . at listId .~ (Just a')
329 , Versioned (a' ^. a_version) q'
330 )
331 saveNodeStory
332 -- Save new ngrams
333 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
334
335 pure vq'
336
337
338
339 -- This is a special case of tableNgramsPut where the input patch is empty.
340 tableNgramsPull :: HasNodeStory env err m
341 => ListId
342 -> TableNgrams.NgramsType
343 -> Version
344 -> m (Versioned NgramsTablePatch)
345 tableNgramsPull listId ngramsType p_version = do
346 printDebug "[tableNgramsPull]" (listId, ngramsType)
347 var <- getNodeStoryVar [listId]
348 r <- liftBase $ readMVar var
349
350 let
351 a = r ^. unNodeStory . at listId . _Just
352 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
353 q_table = q ^. _PatchMap . at ngramsType . _Just
354
355 pure (Versioned (a ^. a_version) q_table)
356
357
358
359
360 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
361 -- Apply the given patch to the DB and returns the patch to be applied on the
362 -- client.
363 -- TODO-ACCESS check
364 tableNgramsPut :: ( HasNodeStory env err m
365 , HasInvalidError err
366 , HasSettings env
367 , HasMail env
368 )
369 => TabType
370 -> ListId
371 -> Versioned NgramsTablePatch
372 -> m (Versioned NgramsTablePatch)
373 tableNgramsPut tabType listId (Versioned p_version p_table)
374 | p_table == mempty = do
375 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
376 let ngramsType = ngramsTypeFromTabType tabType
377 tableNgramsPull listId ngramsType p_version
378
379 | otherwise = do
380 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
381 let ngramsType = ngramsTypeFromTabType tabType
382 (p, p_validity) = PM.singleton ngramsType p_table
383
384 assertValid p_validity
385
386 ret <- commitStatePatch listId (Versioned p_version p)
387 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
388
389 pure ret
390
391
392
393 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
394 , FlowCmdM env err m
395 , HasNodeError err
396 , HasSettings env
397 )
398 => UpdateTableNgramsCharts
399 -> (JobLog -> m ())
400 -> m JobLog
401 tableNgramsPostChartsAsync utn logStatus = do
402 let tabType = utn ^. utn_tab_type
403 let listId = utn ^. utn_list_id
404
405 node <- getNode listId
406 let nId = node ^. node_id
407 _uId = node ^. node_user_id
408 mCId = node ^. node_parent_id
409
410 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
411 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
412
413 case mCId of
414 Nothing -> do
415 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
416 pure $ jobLogFail $ jobLogInit 1
417 Just cId -> do
418 case tabType of
419 Authors -> do
420 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
421 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
422 logRef
423 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
424 logRefSuccess
425
426 getRef
427 Institutes -> do
428 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
429 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
430 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
431 logRef
432 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
433 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
434 logRefSuccess
435 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
436 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
437 logRefSuccess
438 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
439 logRefSuccess
440
441 getRef
442 Sources -> do
443 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
444 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
445 logRef
446 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
447 logRefSuccess
448
449 getRef
450 Terms -> do
451 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
452 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
453 logRef
454 {-
455 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
456 logRefSuccess
457 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
458 logRefSuccess
459 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
460 logRefSuccess
461 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
462 logRefSuccess
463 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
464 logRefSuccess
465 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
466 -}
467 logRefSuccess
468
469 getRef
470 _ -> do
471 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
472 pure $ jobLogFail $ jobLogInit 1
473
474 {-
475 { _ne_list :: ListType
476 If we merge the parents/children we can potentially create cycles!
477 , _ne_parent :: Maybe NgramsTerm
478 , _ne_children :: MSet NgramsTerm
479 }
480 -}
481
482 getNgramsTableMap :: HasNodeStory env err m
483 => NodeId
484 -> TableNgrams.NgramsType
485 -> m (Versioned NgramsTableMap)
486 getNgramsTableMap nodeId ngramsType = do
487 v <- getNodeStoryVar [nodeId]
488 repo <- liftBase $ readMVar v
489 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
490 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
491
492
493 dumpJsonTableMap :: HasNodeStory env err m
494 => Text
495 -> NodeId
496 -> TableNgrams.NgramsType
497 -> m ()
498 dumpJsonTableMap fpath nodeId ngramsType = do
499 m <- getNgramsTableMap nodeId ngramsType
500 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
501 pure ()
502
503
504 type MinSize = Int
505 type MaxSize = Int
506
507 -- | TODO Errors management
508 -- TODO: polymorphic for Annuaire or Corpus or ...
509 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
510 -- TODO: should take only one ListId
511
512
513 getTableNgrams :: forall env err m.
514 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
515 => NodeType -> NodeId -> TabType
516 -> ListId -> Limit -> Maybe Offset
517 -> Maybe ListType
518 -> Maybe MinSize -> Maybe MaxSize
519 -> Maybe OrderBy
520 -> (NgramsTerm -> Bool)
521 -> m (VersionedWithCount NgramsTable)
522 getTableNgrams _nType nId tabType listId limit_ offset
523 listType minSize maxSize orderBy searchQuery = do
524
525 t0 <- getTime
526 -- lIds <- selectNodesWithUsername NodeList userMaster
527 let
528 ngramsType = ngramsTypeFromTabType tabType
529 offset' = maybe 0 identity offset
530 listType' = maybe (const True) (==) listType
531 minSize' = maybe (const True) (<=) minSize
532 maxSize' = maybe (const True) (>=) maxSize
533
534 selected_node n = minSize' s
535 && maxSize' s
536 && searchQuery (n ^. ne_ngrams)
537 && listType' (n ^. ne_list)
538 where
539 s = n ^. ne_size
540
541 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
542
543 ---------------------------------------
544 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
545 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
546 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
547 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
548 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
549
550 ---------------------------------------
551 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
552 filteredNodes tableMap = rootOf <$> list & filter selected_node
553 where
554 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
555 (tableMap ^. at r)
556 )
557 (ne ^. ne_root)
558 list = tableMap ^.. each
559
560 ---------------------------------------
561 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
562 selectAndPaginate tableMap = roots <> inners
563 where
564 list = tableMap ^.. each
565 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
566 (tableMap ^. at r)
567 )
568 (ne ^. ne_root)
569 selected_nodes = list & take limit_
570 . drop offset'
571 . filter selected_node
572 . sortOnOrder orderBy
573 roots = rootOf <$> selected_nodes
574 rootsSet = Set.fromList (_ne_ngrams <$> roots)
575 inners = list & filter (selected_inner rootsSet)
576
577 ---------------------------------------
578 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
579 setScores False table = pure table
580 setScores True table = do
581 let ngrams_terms = table ^.. each . ne_ngrams
582 -- printDebug "ngrams_terms" ngrams_terms
583 t1 <- getTime
584 occurrences <- getOccByNgramsOnlyFast nId
585 listId
586 ngramsType
587 --printDebug "occurrences" occurrences
588 t2 <- getTime
589 liftBase $ hprint stderr
590 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
591 (length ngrams_terms) t1 t2
592 let
593 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
594
595 pure $ table & each %~ setOcc
596 ---------------------------------------
597
598 -- lists <- catMaybes <$> listsWith userMaster
599 -- trace (show lists) $
600 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
601
602
603 let scoresNeeded = needsScores orderBy
604 tableMap1 <- getNgramsTableMap listId ngramsType
605 t1 <- getTime
606
607 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
608 . Map.mapWithKey ngramsElementFromRepo
609
610 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
611 . filteredNodes
612
613 let fltrCount = length $ fltr ^. v_data . _NgramsTable
614
615 t2 <- getTime
616 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
617 . setScores (not scoresNeeded)
618 . selectAndPaginate
619 t3 <- getTime
620 liftBase $ hprint stderr
621 ("getTableNgrams total=" % hasTime
622 % " map1=" % hasTime
623 % " map2=" % hasTime
624 % " map3=" % hasTime
625 % " sql=" % (if scoresNeeded then "map2" else "map3")
626 % "\n"
627 ) t0 t3 t0 t1 t1 t2 t2 t3
628 pure $ toVersionedWithCount fltrCount tableMap3
629
630
631
632 scoresRecomputeTableNgrams :: forall env err m.
633 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
634 => NodeId -> TabType -> ListId -> m Int
635 scoresRecomputeTableNgrams nId tabType listId = do
636 tableMap <- getNgramsTableMap listId ngramsType
637 _ <- tableMap & v_data %%~ setScores
638 . Map.mapWithKey ngramsElementFromRepo
639
640 pure $ 1
641 where
642 ngramsType = ngramsTypeFromTabType tabType
643
644 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
645 setScores table = do
646 occurrences <- getOccByNgramsOnlyFast nId
647 listId
648 ngramsType
649 let
650 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
651
652 pure $ table & each %~ setOcc
653
654
655 -- APIs
656
657 -- TODO: find a better place for the code above, All APIs stay here
658
659 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
660 deriving (Generic, Enum, Bounded, Read, Show)
661
662 instance FromHttpApiData OrderBy
663 where
664 parseUrlPiece "TermAsc" = pure TermAsc
665 parseUrlPiece "TermDesc" = pure TermDesc
666 parseUrlPiece "ScoreAsc" = pure ScoreAsc
667 parseUrlPiece "ScoreDesc" = pure ScoreDesc
668 parseUrlPiece _ = Left "Unexpected value of OrderBy"
669
670 instance ToHttpApiData OrderBy where
671 toUrlPiece = pack . show
672
673 instance ToParamSchema OrderBy
674 instance FromJSON OrderBy
675 instance ToJSON OrderBy
676 instance ToSchema OrderBy
677 instance Arbitrary OrderBy
678 where
679 arbitrary = elements [minBound..maxBound]
680
681 needsScores :: Maybe OrderBy -> Bool
682 needsScores (Just ScoreAsc) = True
683 needsScores (Just ScoreDesc) = True
684 needsScores _ = False
685
686 type TableNgramsApiGet = Summary " Table Ngrams API Get"
687 :> QueryParamR "ngramsType" TabType
688 :> QueryParamR "list" ListId
689 :> QueryParamR "limit" Limit
690 :> QueryParam "offset" Offset
691 :> QueryParam "listType" ListType
692 :> QueryParam "minTermSize" MinSize
693 :> QueryParam "maxTermSize" MaxSize
694 :> QueryParam "orderBy" OrderBy
695 :> QueryParam "search" Text
696 :> Get '[JSON] (VersionedWithCount NgramsTable)
697
698 type TableNgramsApiPut = Summary " Table Ngrams API Change"
699 :> QueryParamR "ngramsType" TabType
700 :> QueryParamR "list" ListId
701 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
702 :> Put '[JSON] (Versioned NgramsTablePatch)
703
704 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
705 :> QueryParamR "ngramsType" TabType
706 :> QueryParamR "list" ListId
707 :> "recompute" :> Post '[JSON] Int
708
709 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
710 :> QueryParamR "ngramsType" TabType
711 :> QueryParamR "list" ListId
712 :> Get '[JSON] Version
713
714 type TableNgramsApi = TableNgramsApiGet
715 :<|> TableNgramsApiPut
716 :<|> RecomputeScoresNgramsApiGet
717 :<|> "version" :> TableNgramsApiGetVersion
718 :<|> TableNgramsAsyncApi
719
720 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
721 :> "async"
722 :> "charts"
723 :> "update"
724 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
725
726 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
727 => NodeId
728 -> TabType
729 -> ListId
730 -> Limit
731 -> Maybe Offset
732 -> Maybe ListType
733 -> Maybe MinSize -> Maybe MaxSize
734 -> Maybe OrderBy
735 -> Maybe Text -- full text search
736 -> m (VersionedWithCount NgramsTable)
737 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
738 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
739 where
740 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
741
742
743
744 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
745 => NodeId
746 -> TabType
747 -> ListId
748 -> m Version
749 getTableNgramsVersion _nId _tabType listId = currentVersion listId
750
751
752
753 -- TODO: limit?
754 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
755 -- This line above looks like a waste of computation to finally get only the version.
756 -- See the comment about listNgramsChangedSince.
757
758
759 -- | Text search is deactivated for now for ngrams by doc only
760 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
761 => DocId -> TabType
762 -> ListId -> Limit -> Maybe Offset
763 -> Maybe ListType
764 -> Maybe MinSize -> Maybe MaxSize
765 -> Maybe OrderBy
766 -> Maybe Text -- full text search
767 -> m (VersionedWithCount NgramsTable)
768 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
769 ns <- selectNodesWithUsername NodeList userMaster
770 let ngramsType = ngramsTypeFromTabType tabType
771 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
772 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
773 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
774
775
776
777 apiNgramsTableCorpus :: ( GargServerC env err m
778 )
779 => NodeId -> ServerT TableNgramsApi m
780 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
781 :<|> tableNgramsPut
782 :<|> scoresRecomputeTableNgrams cId
783 :<|> getTableNgramsVersion cId
784 :<|> apiNgramsAsync cId
785
786 apiNgramsTableDoc :: ( GargServerC env err m
787 )
788 => DocId -> ServerT TableNgramsApi m
789 apiNgramsTableDoc dId = getTableNgramsDoc dId
790 :<|> tableNgramsPut
791 :<|> scoresRecomputeTableNgrams dId
792 :<|> getTableNgramsVersion dId
793 :<|> apiNgramsAsync dId
794
795 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
796 apiNgramsAsync _dId =
797 serveJobsAPI $
798 JobFunction $ \i log ->
799 let
800 log' x = do
801 printDebug "tableNgramsPostChartsAsync" x
802 liftBase $ log x
803 in tableNgramsPostChartsAsync i log'
804
805 -- Did the given list of ngrams changed since the given version?
806 -- The returned value is versioned boolean value, meaning that one always retrieve the
807 -- latest version.
808 -- If the given version is negative then one simply receive the latest version and True.
809 -- Using this function is more precise than simply comparing the latest version number
810 -- with the local version number. Indeed there might be no change to this particular list
811 -- and still the version number has changed because of other lists.
812 --
813 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
814 -- * currentVersion: good computation, good bandwidth, bad precision.
815 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
816 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
817 listNgramsChangedSince :: HasNodeStory env err m
818 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
819 listNgramsChangedSince listId ngramsType version
820 | version < 0 =
821 Versioned <$> currentVersion listId <*> pure True
822 | otherwise =
823 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)