]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Merge branch 'dev' into 141-dev-node-stories-db-optimization
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , Version
73 , Versioned(..)
74 , VersionedWithCount(..)
75 , currentVersion
76 , listNgramsChangedSince
77 , MinSize, MaxSize, OrderBy, NgramsTable
78 , UpdateTableNgramsCharts
79 )
80 where
81
82 import Control.Concurrent
83 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
84 import Control.Monad.Reader
85 import Data.Aeson hiding ((.=))
86 import Data.Either (Either(..))
87 import Data.Foldable
88 import Data.Map.Strict (Map)
89 import Data.Maybe (fromMaybe)
90 import Data.Monoid
91 import Data.Ord (Down(..))
92 import Data.Patch.Class (Action(act), Transformable(..), ours)
93 import Data.Swagger hiding (version, patch)
94 import Data.Text (Text, isInfixOf, unpack, pack)
95 import Data.Text.Lazy.IO as DTL
96 import Formatting (hprint, int, (%))
97 import GHC.Generics (Generic)
98 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
99 import Gargantext.API.Admin.Types (HasSettings)
100 import Gargantext.API.Job
101 import Gargantext.API.Ngrams.Types
102 import Gargantext.API.Prelude
103 import Gargantext.Core.NodeStory
104 import Gargantext.Core.Mail.Types (HasMail)
105 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
106 import Gargantext.API.Ngrams.Tools
107 import Gargantext.Database.Action.Flow.Types
108 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
109 import Gargantext.Database.Admin.Config (userMaster)
110 import Gargantext.Database.Admin.Types.Node (NodeType(..))
111 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
112 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
113 import Gargantext.Database.Query.Table.Node (getNode)
114 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
115 import Gargantext.Database.Query.Table.Node.Select
116 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
117 import Gargantext.Prelude hiding (log)
118 import Gargantext.Prelude.Clock (hasTime, getTime)
119 import Prelude (error)
120 import Servant hiding (Patch)
121 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
122 import System.IO (stderr)
123 import Test.QuickCheck (elements)
124 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
125 import qualified Data.Aeson.Text as DAT
126 import qualified Data.List as List
127 import qualified Data.Map.Strict as Map
128 import qualified Data.Map.Strict.Patch as PM
129 import qualified Data.Set as S
130 import qualified Data.Set as Set
131 import qualified Gargantext.API.Metrics as Metrics
132 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
133
134 {-
135 -- TODO sequences of modifications (Patchs)
136 type NgramsIdPatch = Patch NgramsId NgramsPatch
137
138 ngramsPatch :: Int -> NgramsPatch
139 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
140
141 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
142 toEdit n p = Edit n p
143 ngramsIdPatch :: Patch NgramsId NgramsPatch
144 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
145 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
146 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
147 ]
148
149 -- applyPatchBack :: Patch -> IO Patch
150 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
151 -}
152 ------------------------------------------------------------------------
153 ------------------------------------------------------------------------
154 ------------------------------------------------------------------------
155
156 {-
157 -- TODO: Replace.old is ignored which means that if the current list
158 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
159 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
160 -- However this should not happen in non conflicting situations.
161 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
162 mkListsUpdate nt patches =
163 [ (ngramsTypeId nt, ng, listTypeId lt)
164 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
165 , lt <- patch ^.. patch_list . new
166 ]
167
168 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
169 -> NgramsType
170 -> NgramsTablePatch
171 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
172 mkChildrenGroups addOrRem nt patches =
173 [ (ngramsTypeId nt, parent, child)
174 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
175 , child <- patch ^.. patch_children . to addOrRem . folded
176 ]
177 -}
178
179 ------------------------------------------------------------------------
180
181 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
182 => m ()
183 saveNodeStory = do
184 saver <- view hasNodeStorySaver
185 liftBase $ do
186 Gargantext.Prelude.putStrLn "---- Running node story saver ----"
187 saver
188 Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
189
190
191 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
192 => m ()
193 saveNodeStoryImmediate = do
194 saver <- view hasNodeStoryImmediateSaver
195 liftBase $ do
196 Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
197 saver
198 Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
199
200
201 listTypeConflictResolution :: ListType -> ListType -> ListType
202 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
203
204
205 ngramsStatePatchConflictResolution
206 :: TableNgrams.NgramsType
207 -> NgramsTerm
208 -> ConflictResolutionNgramsPatch
209 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
210 = (ours, (const ours, ours), (False, False))
211 -- (False, False) mean here that Mod has always priority.
212 -- (True, False) <- would mean priority to the left (same as ours).
213 -- undefined {- TODO think this through -}, listTypeConflictResolution)
214
215
216
217
218 -- Current state:
219 -- Insertions are not considered as patches,
220 -- they do not extend history,
221 -- they do not bump version.
222 insertNewOnly :: a -> Maybe b -> a
223 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
224 -- TODO error handling
225
226 {- unused
227 -- TODO refactor with putListNgrams
228 copyListNgrams :: RepoCmdM env err m
229 => NodeId -> NodeId -> NgramsType
230 -> m ()
231 copyListNgrams srcListId dstListId ngramsType = do
232 var <- view repoVar
233 liftBase $ modifyMVar_ var $
234 pure . (r_state . at ngramsType %~ (Just . f . something))
235 saveNodeStory
236 where
237 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
238 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
239
240 -- TODO refactor with putListNgrams
241 -- The list must be non-empty!
242 -- The added ngrams must be non-existent!
243 addListNgrams :: RepoCmdM env err m
244 => NodeId -> NgramsType
245 -> [NgramsElement] -> m ()
246 addListNgrams listId ngramsType nes = do
247 var <- view repoVar
248 liftBase $ modifyMVar_ var $
249 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
250 saveNodeStory
251 where
252 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
253 -}
254
255 -- | TODO: incr the Version number
256 -- && should use patch
257 -- UNSAFE
258
259 setListNgrams :: HasNodeStory env err m
260 => NodeId
261 -> TableNgrams.NgramsType
262 -> Map NgramsTerm NgramsRepoElement
263 -> m ()
264 setListNgrams listId ngramsType ns = do
265 -- printDebug "[setListNgrams]" (listId, ngramsType)
266 getter <- view hasNodeStory
267 var <- liftBase $ (getter ^. nse_getter) [listId]
268 liftBase $ modifyMVar_ var $
269 pure . ( unNodeStory
270 . at listId . _Just
271 . a_state
272 . at ngramsType
273 .~ Just ns
274 )
275 saveNodeStory
276
277
278 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
279 newNgramsFromNgramsStatePatch p =
280 [ text2ngrams (unNgramsTerm n)
281 | (n,np) <- p ^.. _PatchMap
282 -- . each . _PatchMap
283 . each . _NgramsTablePatch
284 . _PatchMap . ifolded . withIndex
285 , _ <- np ^.. patch_new . _Just
286 ]
287
288
289
290
291 commitStatePatch :: (HasNodeStory env err m, HasMail env)
292 => ListId
293 -> Versioned NgramsStatePatch'
294 -> m (Versioned NgramsStatePatch')
295 commitStatePatch listId (Versioned _p_version p) = do
296 -- printDebug "[commitStatePatch]" listId
297 var <- getNodeStoryVar [listId]
298 vq' <- liftBase $ modifyMVar var $ \ns -> do
299 let
300 a = ns ^. unNodeStory . at listId . _Just
301 -- apply patches from version p_version to a ^. a_version
302 -- TODO Check this
303 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
304 q = mconcat $ a ^. a_history
305 (p', q') = transformWith ngramsStatePatchConflictResolution p q
306 a' = a & a_version +~ 1
307 & a_state %~ act p'
308 & a_history %~ (p' :)
309
310 {-
311 -- Ideally we would like to check these properties. However:
312 -- * They should be checked only to debug the code. The client data
313 -- should be able to trigger these.
314 -- * What kind of error should they throw (we are in IO here)?
315 -- * Should we keep modifyMVar?
316 -- * Should we throw the validation in an Exception, catch it around
317 -- modifyMVar and throw it back as an Error?
318 assertValid $ transformable p q
319 assertValid $ applicable p' (r ^. r_state)
320 -}
321 printDebug "[commitStatePatch] a version" (a ^. a_version)
322 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
323 pure ( ns & unNodeStory . at listId .~ (Just a')
324 , Versioned (a' ^. a_version) q'
325 )
326 saveNodeStory
327 -- Save new ngrams
328 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
329
330 pure vq'
331
332
333
334 -- This is a special case of tableNgramsPut where the input patch is empty.
335 tableNgramsPull :: HasNodeStory env err m
336 => ListId
337 -> TableNgrams.NgramsType
338 -> Version
339 -> m (Versioned NgramsTablePatch)
340 tableNgramsPull listId ngramsType p_version = do
341 printDebug "[tableNgramsPull]" (listId, ngramsType)
342 var <- getNodeStoryVar [listId]
343 r <- liftBase $ readMVar var
344
345 let
346 a = r ^. unNodeStory . at listId . _Just
347 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
348 q_table = q ^. _PatchMap . at ngramsType . _Just
349
350 pure (Versioned (a ^. a_version) q_table)
351
352
353
354
355 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
356 -- Apply the given patch to the DB and returns the patch to be applied on the
357 -- client.
358 -- TODO-ACCESS check
359 tableNgramsPut :: ( HasNodeStory env err m
360 , HasInvalidError err
361 , HasSettings env
362 , HasMail env
363 )
364 => TabType
365 -> ListId
366 -> Versioned NgramsTablePatch
367 -> m (Versioned NgramsTablePatch)
368 tableNgramsPut tabType listId (Versioned p_version p_table)
369 | p_table == mempty = do
370 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
371 let ngramsType = ngramsTypeFromTabType tabType
372 tableNgramsPull listId ngramsType p_version
373
374 | otherwise = do
375 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
376 let ngramsType = ngramsTypeFromTabType tabType
377 (p, p_validity) = PM.singleton ngramsType p_table
378
379 assertValid p_validity
380
381 ret <- commitStatePatch listId (Versioned p_version p)
382 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
383
384 pure ret
385
386
387
388 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
389 , FlowCmdM env err m
390 , HasNodeError err
391 , HasSettings env
392 )
393 => UpdateTableNgramsCharts
394 -> (JobLog -> m ())
395 -> m JobLog
396 tableNgramsPostChartsAsync utn logStatus = do
397 let tabType = utn ^. utn_tab_type
398 let listId = utn ^. utn_list_id
399
400 node <- getNode listId
401 let nId = node ^. node_id
402 _uId = node ^. node_user_id
403 mCId = node ^. node_parent_id
404
405 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
406 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
407
408 case mCId of
409 Nothing -> do
410 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
411 pure $ jobLogFail $ jobLogInit 1
412 Just cId -> do
413 case tabType of
414 Authors -> do
415 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
416 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
417 logRef
418 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
419 logRefSuccess
420
421 getRef
422 Institutes -> do
423 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
424 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
425 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
426 logRef
427 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
428 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
429 logRefSuccess
430 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
431 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
432 logRefSuccess
433 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
434 logRefSuccess
435
436 getRef
437 Sources -> do
438 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
439 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
440 logRef
441 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
442 logRefSuccess
443
444 getRef
445 Terms -> do
446 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
447 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
448 logRef
449 {-
450 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
451 logRefSuccess
452 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
453 logRefSuccess
454 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
455 logRefSuccess
456 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
457 logRefSuccess
458 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
459 logRefSuccess
460 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
461 -}
462 logRefSuccess
463
464 getRef
465 _ -> do
466 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
467 pure $ jobLogFail $ jobLogInit 1
468
469 {-
470 { _ne_list :: ListType
471 If we merge the parents/children we can potentially create cycles!
472 , _ne_parent :: Maybe NgramsTerm
473 , _ne_children :: MSet NgramsTerm
474 }
475 -}
476
477 getNgramsTableMap :: HasNodeStory env err m
478 => NodeId
479 -> TableNgrams.NgramsType
480 -> m (Versioned NgramsTableMap)
481 getNgramsTableMap nodeId ngramsType = do
482 v <- getNodeStoryVar [nodeId]
483 repo <- liftBase $ readMVar v
484 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
485 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
486
487
488 dumpJsonTableMap :: HasNodeStory env err m
489 => Text
490 -> NodeId
491 -> TableNgrams.NgramsType
492 -> m ()
493 dumpJsonTableMap fpath nodeId ngramsType = do
494 m <- getNgramsTableMap nodeId ngramsType
495 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
496 pure ()
497
498
499 type MinSize = Int
500 type MaxSize = Int
501
502 -- | TODO Errors management
503 -- TODO: polymorphic for Annuaire or Corpus or ...
504 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
505 -- TODO: should take only one ListId
506
507
508 getTableNgrams :: forall env err m.
509 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
510 => NodeType -> NodeId -> TabType
511 -> ListId -> Limit -> Maybe Offset
512 -> Maybe ListType
513 -> Maybe MinSize -> Maybe MaxSize
514 -> Maybe OrderBy
515 -> (NgramsTerm -> Bool)
516 -> m (VersionedWithCount NgramsTable)
517 getTableNgrams _nType nId tabType listId limit_ offset
518 listType minSize maxSize orderBy searchQuery = do
519
520 t0 <- getTime
521 -- lIds <- selectNodesWithUsername NodeList userMaster
522 let
523 ngramsType = ngramsTypeFromTabType tabType
524 offset' = maybe 0 identity offset
525 listType' = maybe (const True) (==) listType
526 minSize' = maybe (const True) (<=) minSize
527 maxSize' = maybe (const True) (>=) maxSize
528
529 selected_node n = minSize' s
530 && maxSize' s
531 && searchQuery (n ^. ne_ngrams)
532 && listType' (n ^. ne_list)
533 where
534 s = n ^. ne_size
535
536 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
537
538 ---------------------------------------
539 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
540 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
541 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
542 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
543 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
544
545 ---------------------------------------
546 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
547 filteredNodes tableMap = rootOf <$> list & filter selected_node
548 where
549 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
550 (tableMap ^. at r)
551 )
552 (ne ^. ne_root)
553 list = tableMap ^.. each
554
555 ---------------------------------------
556 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
557 selectAndPaginate tableMap = roots <> inners
558 where
559 list = tableMap ^.. each
560 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
561 (tableMap ^. at r)
562 )
563 (ne ^. ne_root)
564 selected_nodes = list & take limit_
565 . drop offset'
566 . filter selected_node
567 . sortOnOrder orderBy
568 roots = rootOf <$> selected_nodes
569 rootsSet = Set.fromList (_ne_ngrams <$> roots)
570 inners = list & filter (selected_inner rootsSet)
571
572 ---------------------------------------
573 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
574 setScores False table = pure table
575 setScores True table = do
576 let ngrams_terms = table ^.. each . ne_ngrams
577 -- printDebug "ngrams_terms" ngrams_terms
578 t1 <- getTime
579 occurrences <- getOccByNgramsOnlyFast' nId
580 listId
581 ngramsType
582 ngrams_terms
583 --printDebug "occurrences" occurrences
584 t2 <- getTime
585 liftBase $ hprint stderr
586 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
587 (length ngrams_terms) t1 t2
588 let
589 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
590
591 pure $ table & each %~ setOcc
592 ---------------------------------------
593
594 -- lists <- catMaybes <$> listsWith userMaster
595 -- trace (show lists) $
596 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
597
598
599 let scoresNeeded = needsScores orderBy
600 tableMap1 <- getNgramsTableMap listId ngramsType
601 t1 <- getTime
602
603 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
604 . Map.mapWithKey ngramsElementFromRepo
605
606 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
607 . filteredNodes
608
609 let fltrCount = length $ fltr ^. v_data . _NgramsTable
610
611 t2 <- getTime
612 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
613 . setScores (not scoresNeeded)
614 . selectAndPaginate
615 t3 <- getTime
616 liftBase $ hprint stderr
617 ("getTableNgrams total=" % hasTime
618 % " map1=" % hasTime
619 % " map2=" % hasTime
620 % " map3=" % hasTime
621 % " sql=" % (if scoresNeeded then "map2" else "map3")
622 % "\n"
623 ) t0 t3 t0 t1 t1 t2 t2 t3
624 pure $ toVersionedWithCount fltrCount tableMap3
625
626
627
628 scoresRecomputeTableNgrams :: forall env err m.
629 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
630 => NodeId -> TabType -> ListId -> m Int
631 scoresRecomputeTableNgrams nId tabType listId = do
632 tableMap <- getNgramsTableMap listId ngramsType
633 _ <- tableMap & v_data %%~ setScores
634 . Map.mapWithKey ngramsElementFromRepo
635
636 pure $ 1
637 where
638 ngramsType = ngramsTypeFromTabType tabType
639
640 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
641 setScores table = do
642 let ngrams_terms = table ^.. each . ne_ngrams
643 occurrences <- getOccByNgramsOnlyFast' nId
644 listId
645 ngramsType
646 ngrams_terms
647 let
648 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
649
650 pure $ table & each %~ setOcc
651
652
653
654
655 -- APIs
656
657 -- TODO: find a better place for the code above, All APIs stay here
658
659 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
660 deriving (Generic, Enum, Bounded, Read, Show)
661
662 instance FromHttpApiData OrderBy
663 where
664 parseUrlPiece "TermAsc" = pure TermAsc
665 parseUrlPiece "TermDesc" = pure TermDesc
666 parseUrlPiece "ScoreAsc" = pure ScoreAsc
667 parseUrlPiece "ScoreDesc" = pure ScoreDesc
668 parseUrlPiece _ = Left "Unexpected value of OrderBy"
669
670 instance ToHttpApiData OrderBy where
671 toUrlPiece = pack . show
672
673 instance ToParamSchema OrderBy
674 instance FromJSON OrderBy
675 instance ToJSON OrderBy
676 instance ToSchema OrderBy
677 instance Arbitrary OrderBy
678 where
679 arbitrary = elements [minBound..maxBound]
680
681 needsScores :: Maybe OrderBy -> Bool
682 needsScores (Just ScoreAsc) = True
683 needsScores (Just ScoreDesc) = True
684 needsScores _ = False
685
686 type TableNgramsApiGet = Summary " Table Ngrams API Get"
687 :> QueryParamR "ngramsType" TabType
688 :> QueryParamR "list" ListId
689 :> QueryParamR "limit" Limit
690 :> QueryParam "offset" Offset
691 :> QueryParam "listType" ListType
692 :> QueryParam "minTermSize" MinSize
693 :> QueryParam "maxTermSize" MaxSize
694 :> QueryParam "orderBy" OrderBy
695 :> QueryParam "search" Text
696 :> Get '[JSON] (VersionedWithCount NgramsTable)
697
698 type TableNgramsApiPut = Summary " Table Ngrams API Change"
699 :> QueryParamR "ngramsType" TabType
700 :> QueryParamR "list" ListId
701 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
702 :> Put '[JSON] (Versioned NgramsTablePatch)
703
704 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
705 :> QueryParamR "ngramsType" TabType
706 :> QueryParamR "list" ListId
707 :> "recompute" :> Post '[JSON] Int
708
709 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
710 :> QueryParamR "ngramsType" TabType
711 :> QueryParamR "list" ListId
712 :> Get '[JSON] Version
713
714 type TableNgramsApi = TableNgramsApiGet
715 :<|> TableNgramsApiPut
716 :<|> RecomputeScoresNgramsApiGet
717 :<|> "version" :> TableNgramsApiGetVersion
718 :<|> TableNgramsAsyncApi
719
720 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
721 :> "async"
722 :> "charts"
723 :> "update"
724 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
725
726 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
727 => NodeId
728 -> TabType
729 -> ListId
730 -> Limit
731 -> Maybe Offset
732 -> Maybe ListType
733 -> Maybe MinSize -> Maybe MaxSize
734 -> Maybe OrderBy
735 -> Maybe Text -- full text search
736 -> m (VersionedWithCount NgramsTable)
737 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
738 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
739 where
740 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
741
742
743
744 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
745 => NodeId
746 -> TabType
747 -> ListId
748 -> m Version
749 getTableNgramsVersion _nId _tabType listId = currentVersion listId
750
751
752
753 -- TODO: limit?
754 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
755 -- This line above looks like a waste of computation to finally get only the version.
756 -- See the comment about listNgramsChangedSince.
757
758
759 -- | Text search is deactivated for now for ngrams by doc only
760 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
761 => DocId -> TabType
762 -> ListId -> Limit -> Maybe Offset
763 -> Maybe ListType
764 -> Maybe MinSize -> Maybe MaxSize
765 -> Maybe OrderBy
766 -> Maybe Text -- full text search
767 -> m (VersionedWithCount NgramsTable)
768 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
769 ns <- selectNodesWithUsername NodeList userMaster
770 let ngramsType = ngramsTypeFromTabType tabType
771 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
772 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
773 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
774
775
776
777 apiNgramsTableCorpus :: ( GargServerC env err m
778 )
779 => NodeId -> ServerT TableNgramsApi m
780 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
781 :<|> tableNgramsPut
782 :<|> scoresRecomputeTableNgrams cId
783 :<|> getTableNgramsVersion cId
784 :<|> apiNgramsAsync cId
785
786 apiNgramsTableDoc :: ( GargServerC env err m
787 )
788 => DocId -> ServerT TableNgramsApi m
789 apiNgramsTableDoc dId = getTableNgramsDoc dId
790 :<|> tableNgramsPut
791 :<|> scoresRecomputeTableNgrams dId
792 :<|> getTableNgramsVersion dId
793 :<|> apiNgramsAsync dId
794
795 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
796 apiNgramsAsync _dId =
797 serveJobsAPI $
798 JobFunction $ \i log ->
799 let
800 log' x = do
801 printDebug "tableNgramsPostChartsAsync" x
802 liftBase $ log x
803 in tableNgramsPostChartsAsync i log'
804
805 -- Did the given list of ngrams changed since the given version?
806 -- The returned value is versioned boolean value, meaning that one always retrieve the
807 -- latest version.
808 -- If the given version is negative then one simply receive the latest version and True.
809 -- Using this function is more precise than simply comparing the latest version number
810 -- with the local version number. Indeed there might be no change to this particular list
811 -- and still the version number has changed because of other lists.
812 --
813 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
814 -- * currentVersion: good computation, good bandwidth, bad precision.
815 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
816 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
817 listNgramsChangedSince :: HasNodeStory env err m
818 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
819 listNgramsChangedSince listId ngramsType version
820 | version < 0 =
821 Versioned <$> currentVersion listId <*> pure True
822 | otherwise =
823 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)