]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[FIX] Import List if label is in forms already
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , Version
73 , Versioned(..)
74 , VersionedWithCount(..)
75 , currentVersion
76 , listNgramsChangedSince
77 , MinSize, MaxSize, OrderBy, NgramsTable
78 , UpdateTableNgramsCharts
79 )
80 where
81
82 import Control.Concurrent
83 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
84 import Control.Monad.Reader
85 import Data.Aeson hiding ((.=))
86 import Data.Either (Either(..))
87 import Data.Foldable
88 import Data.Map.Strict (Map)
89 import Data.Maybe (fromMaybe)
90 import Data.Monoid
91 import Data.Ord (Down(..))
92 import Data.Patch.Class (Action(act), Transformable(..), ours)
93 import Data.Swagger hiding (version, patch)
94 import Data.Text (Text, isInfixOf, unpack, pack)
95 import Data.Text.Lazy.IO as DTL
96 import Formatting (hprint, int, (%))
97 import GHC.Generics (Generic)
98 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
99 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
100 import Gargantext.API.Admin.Types (HasSettings)
101 import Gargantext.API.Job
102 import Gargantext.API.Ngrams.Types
103 import Gargantext.API.Prelude
104 import Gargantext.Core.NodeStory
105 import Gargantext.Core.Mail.Types (HasMail)
106 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
107 import Gargantext.API.Ngrams.Tools
108 import Gargantext.Database.Action.Flow.Types
109 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
110 import Gargantext.Database.Admin.Config (userMaster)
111 import Gargantext.Database.Admin.Types.Node (NodeType(..))
112 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
113 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
114 import Gargantext.Database.Query.Table.Node (getNode)
115 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
116 import Gargantext.Database.Query.Table.Node.Select
117 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
118 import Gargantext.Prelude hiding (log)
119 import Gargantext.Prelude.Clock (hasTime, getTime)
120 import Prelude (error)
121 import Servant hiding (Patch)
122 import Gargantext.Utils.Jobs (serveJobsAPI)
123 import System.IO (stderr)
124 import Test.QuickCheck (elements)
125 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
126 import qualified Data.Aeson.Text as DAT
127 import qualified Data.List as List
128 import qualified Data.Map.Strict as Map
129 import qualified Data.Map.Strict.Patch as PM
130 import qualified Data.Set as S
131 import qualified Data.Set as Set
132 import qualified Gargantext.API.Metrics as Metrics
133 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
134
135 {-
136 -- TODO sequences of modifications (Patchs)
137 type NgramsIdPatch = Patch NgramsId NgramsPatch
138
139 ngramsPatch :: Int -> NgramsPatch
140 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
141
142 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
143 toEdit n p = Edit n p
144 ngramsIdPatch :: Patch NgramsId NgramsPatch
145 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
146 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
147 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
148 ]
149
150 -- applyPatchBack :: Patch -> IO Patch
151 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
152 -}
153 ------------------------------------------------------------------------
154 ------------------------------------------------------------------------
155 ------------------------------------------------------------------------
156
157 {-
158 -- TODO: Replace.old is ignored which means that if the current list
159 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
160 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
161 -- However this should not happen in non conflicting situations.
162 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
163 mkListsUpdate nt patches =
164 [ (ngramsTypeId nt, ng, listTypeId lt)
165 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
166 , lt <- patch ^.. patch_list . new
167 ]
168
169 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
170 -> NgramsType
171 -> NgramsTablePatch
172 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
173 mkChildrenGroups addOrRem nt patches =
174 [ (ngramsTypeId nt, parent, child)
175 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
176 , child <- patch ^.. patch_children . to addOrRem . folded
177 ]
178 -}
179
180 ------------------------------------------------------------------------
181
182 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
183 => m ()
184 saveNodeStory = do
185 saver <- view hasNodeStorySaver
186 liftBase $ do
187 Gargantext.Prelude.putStrLn "---- Running node story saver ----"
188 saver
189 Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
190
191
192 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
193 => m ()
194 saveNodeStoryImmediate = do
195 saver <- view hasNodeStoryImmediateSaver
196 liftBase $ do
197 Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
198 saver
199 Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
200
201
202 listTypeConflictResolution :: ListType -> ListType -> ListType
203 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
204
205
206 ngramsStatePatchConflictResolution
207 :: TableNgrams.NgramsType
208 -> NgramsTerm
209 -> ConflictResolutionNgramsPatch
210 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
211 -- = (ours, (const ours, ours), (False, False))
212 -- (False, False) mean here that Mod has always priority.
213 = (ours, (const ours, ours), (True, False))
214 -- (True, False) <- would mean priority to the left (same as ours).
215 -- undefined {- TODO think this through -}, listTypeConflictResolution)
216
217
218
219
220 -- Current state:
221 -- Insertions are not considered as patches,
222 -- they do not extend history,
223 -- they do not bump version.
224 insertNewOnly :: a -> Maybe b -> a
225 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
226 -- TODO error handling
227
228 {- unused
229 -- TODO refactor with putListNgrams
230 copyListNgrams :: RepoCmdM env err m
231 => NodeId -> NodeId -> NgramsType
232 -> m ()
233 copyListNgrams srcListId dstListId ngramsType = do
234 var <- view repoVar
235 liftBase $ modifyMVar_ var $
236 pure . (r_state . at ngramsType %~ (Just . f . something))
237 saveNodeStory
238 where
239 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
240 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
241
242 -- TODO refactor with putListNgrams
243 -- The list must be non-empty!
244 -- The added ngrams must be non-existent!
245 addListNgrams :: RepoCmdM env err m
246 => NodeId -> NgramsType
247 -> [NgramsElement] -> m ()
248 addListNgrams listId ngramsType nes = do
249 var <- view repoVar
250 liftBase $ modifyMVar_ var $
251 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
252 saveNodeStory
253 where
254 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
255 -}
256
257 -- | TODO: incr the Version number
258 -- && should use patch
259 -- UNSAFE
260
261 setListNgrams :: HasNodeStory env err m
262 => NodeId
263 -> TableNgrams.NgramsType
264 -> Map NgramsTerm NgramsRepoElement
265 -> m ()
266 setListNgrams listId ngramsType ns = do
267 -- printDebug "[setListNgrams]" (listId, ngramsType)
268 getter <- view hasNodeStory
269 var <- liftBase $ (getter ^. nse_getter) [listId]
270 liftBase $ modifyMVar_ var $
271 pure . ( unNodeStory
272 . at listId . _Just
273 . a_state
274 . at ngramsType
275 .~ Just ns
276 )
277 saveNodeStory
278
279
280 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
281 newNgramsFromNgramsStatePatch p =
282 [ text2ngrams (unNgramsTerm n)
283 | (n,np) <- p ^.. _PatchMap
284 -- . each . _PatchMap
285 . each . _NgramsTablePatch
286 . _PatchMap . ifolded . withIndex
287 , _ <- np ^.. patch_new . _Just
288 ]
289
290
291
292
293 commitStatePatch :: (HasNodeStory env err m, HasMail env)
294 => ListId
295 -> Versioned NgramsStatePatch'
296 -> m (Versioned NgramsStatePatch')
297 commitStatePatch listId (Versioned _p_version p) = do
298 -- printDebug "[commitStatePatch]" listId
299 var <- getNodeStoryVar [listId]
300 vq' <- liftBase $ modifyMVar var $ \ns -> do
301 let
302 a = ns ^. unNodeStory . at listId . _Just
303 -- apply patches from version p_version to a ^. a_version
304 -- TODO Check this
305 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
306 q = mconcat $ a ^. a_history
307
308 printDebug "transformWith" (p,q)
309
310 let
311 (p', q') = transformWith ngramsStatePatchConflictResolution p q
312 a' = a & a_version +~ 1
313 & a_state %~ act p'
314 & a_history %~ (p' :)
315
316 {-
317 -- Ideally we would like to check these properties. However:
318 -- * They should be checked only to debug the code. The client data
319 -- should be able to trigger these.
320 -- * What kind of error should they throw (we are in IO here)?
321 -- * Should we keep modifyMVar?
322 -- * Should we throw the validation in an Exception, catch it around
323 -- modifyMVar and throw it back as an Error?
324 assertValid $ transformable p q
325 assertValid $ applicable p' (r ^. r_state)
326 -}
327 printDebug "[commitStatePatch] a version" (a ^. a_version)
328 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
329 pure ( ns & unNodeStory . at listId .~ (Just a')
330 , Versioned (a' ^. a_version) q'
331 )
332 saveNodeStory
333 -- Save new ngrams
334 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
335
336 pure vq'
337
338
339
340 -- This is a special case of tableNgramsPut where the input patch is empty.
341 tableNgramsPull :: HasNodeStory env err m
342 => ListId
343 -> TableNgrams.NgramsType
344 -> Version
345 -> m (Versioned NgramsTablePatch)
346 tableNgramsPull listId ngramsType p_version = do
347 printDebug "[tableNgramsPull]" (listId, ngramsType)
348 var <- getNodeStoryVar [listId]
349 r <- liftBase $ readMVar var
350
351 let
352 a = r ^. unNodeStory . at listId . _Just
353 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
354 q_table = q ^. _PatchMap . at ngramsType . _Just
355
356 pure (Versioned (a ^. a_version) q_table)
357
358
359
360
361 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
362 -- Apply the given patch to the DB and returns the patch to be applied on the
363 -- client.
364 -- TODO-ACCESS check
365 tableNgramsPut :: ( HasNodeStory env err m
366 , HasInvalidError err
367 , HasSettings env
368 , HasMail env
369 )
370 => TabType
371 -> ListId
372 -> Versioned NgramsTablePatch
373 -> m (Versioned NgramsTablePatch)
374 tableNgramsPut tabType listId (Versioned p_version p_table)
375 | p_table == mempty = do
376 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
377 let ngramsType = ngramsTypeFromTabType tabType
378 tableNgramsPull listId ngramsType p_version
379
380 | otherwise = do
381 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
382 let ngramsType = ngramsTypeFromTabType tabType
383 (p, p_validity) = PM.singleton ngramsType p_table
384
385 assertValid p_validity
386
387 ret <- commitStatePatch listId (Versioned p_version p)
388 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
389
390 pure ret
391
392
393
394 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
395 , FlowCmdM env err m
396 , HasNodeError err
397 , HasSettings env
398 )
399 => UpdateTableNgramsCharts
400 -> (JobLog -> m ())
401 -> m JobLog
402 tableNgramsPostChartsAsync utn logStatus = do
403 let tabType = utn ^. utn_tab_type
404 let listId = utn ^. utn_list_id
405
406 node <- getNode listId
407 let nId = node ^. node_id
408 _uId = node ^. node_user_id
409 mCId = node ^. node_parent_id
410
411 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
412 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
413
414 case mCId of
415 Nothing -> do
416 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
417 pure $ jobLogFail $ jobLogInit 1
418 Just cId -> do
419 case tabType of
420 Authors -> do
421 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
422 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
423 logRef
424 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
425 logRefSuccess
426
427 getRef
428 Institutes -> do
429 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
430 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
431 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
432 logRef
433 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
434 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
435 logRefSuccess
436 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
437 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
438 logRefSuccess
439 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
440 logRefSuccess
441
442 getRef
443 Sources -> do
444 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
445 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
446 logRef
447 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
448 logRefSuccess
449
450 getRef
451 Terms -> do
452 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
453 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
454 logRef
455 {-
456 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
457 logRefSuccess
458 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
459 logRefSuccess
460 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
461 logRefSuccess
462 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
463 logRefSuccess
464 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
465 logRefSuccess
466 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
467 -}
468 logRefSuccess
469
470 getRef
471 _ -> do
472 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
473 pure $ jobLogFail $ jobLogInit 1
474
475 {-
476 { _ne_list :: ListType
477 If we merge the parents/children we can potentially create cycles!
478 , _ne_parent :: Maybe NgramsTerm
479 , _ne_children :: MSet NgramsTerm
480 }
481 -}
482
483 getNgramsTableMap :: HasNodeStory env err m
484 => NodeId
485 -> TableNgrams.NgramsType
486 -> m (Versioned NgramsTableMap)
487 getNgramsTableMap nodeId ngramsType = do
488 v <- getNodeStoryVar [nodeId]
489 repo <- liftBase $ readMVar v
490 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
491 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
492
493
494 dumpJsonTableMap :: HasNodeStory env err m
495 => Text
496 -> NodeId
497 -> TableNgrams.NgramsType
498 -> m ()
499 dumpJsonTableMap fpath nodeId ngramsType = do
500 m <- getNgramsTableMap nodeId ngramsType
501 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
502 pure ()
503
504
505 type MinSize = Int
506 type MaxSize = Int
507
508 -- | TODO Errors management
509 -- TODO: polymorphic for Annuaire or Corpus or ...
510 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
511 -- TODO: should take only one ListId
512
513
514 getTableNgrams :: forall env err m.
515 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
516 => NodeType -> NodeId -> TabType
517 -> ListId -> Limit -> Maybe Offset
518 -> Maybe ListType
519 -> Maybe MinSize -> Maybe MaxSize
520 -> Maybe OrderBy
521 -> (NgramsTerm -> Bool)
522 -> m (VersionedWithCount NgramsTable)
523 getTableNgrams _nType nId tabType listId limit_ offset
524 listType minSize maxSize orderBy searchQuery = do
525
526 t0 <- getTime
527 -- lIds <- selectNodesWithUsername NodeList userMaster
528 let
529 ngramsType = ngramsTypeFromTabType tabType
530 offset' = maybe 0 identity offset
531 listType' = maybe (const True) (==) listType
532 minSize' = maybe (const True) (<=) minSize
533 maxSize' = maybe (const True) (>=) maxSize
534
535 selected_node n = minSize' s
536 && maxSize' s
537 && searchQuery (n ^. ne_ngrams)
538 && listType' (n ^. ne_list)
539 where
540 s = n ^. ne_size
541
542 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
543
544 ---------------------------------------
545 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
546 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
547 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
548 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
549 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
550
551 ---------------------------------------
552 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
553 filteredNodes tableMap = rootOf <$> list & filter selected_node
554 where
555 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
556 (tableMap ^. at r)
557 )
558 (ne ^. ne_root)
559 list = tableMap ^.. each
560
561 ---------------------------------------
562 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
563 selectAndPaginate tableMap = roots <> inners
564 where
565 list = tableMap ^.. each
566 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
567 (tableMap ^. at r)
568 )
569 (ne ^. ne_root)
570 selected_nodes = list & take limit_
571 . drop offset'
572 . filter selected_node
573 . sortOnOrder orderBy
574 roots = rootOf <$> selected_nodes
575 rootsSet = Set.fromList (_ne_ngrams <$> roots)
576 inners = list & filter (selected_inner rootsSet)
577
578 ---------------------------------------
579 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
580 setScores False table = pure table
581 setScores True table = do
582 let ngrams_terms = table ^.. each . ne_ngrams
583 -- printDebug "ngrams_terms" ngrams_terms
584 t1 <- getTime
585 occurrences <- getOccByNgramsOnlyFast nId
586 listId
587 ngramsType
588 --printDebug "occurrences" occurrences
589 t2 <- getTime
590 liftBase $ hprint stderr
591 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
592 (length ngrams_terms) t1 t2
593 let
594 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
595
596 pure $ table & each %~ setOcc
597 ---------------------------------------
598
599 -- lists <- catMaybes <$> listsWith userMaster
600 -- trace (show lists) $
601 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
602
603
604 let scoresNeeded = needsScores orderBy
605 tableMap1 <- getNgramsTableMap listId ngramsType
606 t1 <- getTime
607
608 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
609 . Map.mapWithKey ngramsElementFromRepo
610
611 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
612 . filteredNodes
613
614 let fltrCount = length $ fltr ^. v_data . _NgramsTable
615
616 t2 <- getTime
617 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
618 . setScores (not scoresNeeded)
619 . selectAndPaginate
620 t3 <- getTime
621 liftBase $ hprint stderr
622 ("getTableNgrams total=" % hasTime
623 % " map1=" % hasTime
624 % " map2=" % hasTime
625 % " map3=" % hasTime
626 % " sql=" % (if scoresNeeded then "map2" else "map3")
627 % "\n"
628 ) t0 t3 t0 t1 t1 t2 t2 t3
629 pure $ toVersionedWithCount fltrCount tableMap3
630
631
632
633 scoresRecomputeTableNgrams :: forall env err m.
634 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
635 => NodeId -> TabType -> ListId -> m Int
636 scoresRecomputeTableNgrams nId tabType listId = do
637 tableMap <- getNgramsTableMap listId ngramsType
638 _ <- tableMap & v_data %%~ setScores
639 . Map.mapWithKey ngramsElementFromRepo
640
641 pure $ 1
642 where
643 ngramsType = ngramsTypeFromTabType tabType
644
645 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
646 setScores table = do
647 occurrences <- getOccByNgramsOnlyFast nId
648 listId
649 ngramsType
650 let
651 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
652
653 pure $ table & each %~ setOcc
654
655
656 -- APIs
657
658 -- TODO: find a better place for the code above, All APIs stay here
659
660 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
661 deriving (Generic, Enum, Bounded, Read, Show)
662
663 instance FromHttpApiData OrderBy
664 where
665 parseUrlPiece "TermAsc" = pure TermAsc
666 parseUrlPiece "TermDesc" = pure TermDesc
667 parseUrlPiece "ScoreAsc" = pure ScoreAsc
668 parseUrlPiece "ScoreDesc" = pure ScoreDesc
669 parseUrlPiece _ = Left "Unexpected value of OrderBy"
670
671 instance ToHttpApiData OrderBy where
672 toUrlPiece = pack . show
673
674 instance ToParamSchema OrderBy
675 instance FromJSON OrderBy
676 instance ToJSON OrderBy
677 instance ToSchema OrderBy
678 instance Arbitrary OrderBy
679 where
680 arbitrary = elements [minBound..maxBound]
681
682 needsScores :: Maybe OrderBy -> Bool
683 needsScores (Just ScoreAsc) = True
684 needsScores (Just ScoreDesc) = True
685 needsScores _ = False
686
687 type TableNgramsApiGet = Summary " Table Ngrams API Get"
688 :> QueryParamR "ngramsType" TabType
689 :> QueryParamR "list" ListId
690 :> QueryParamR "limit" Limit
691 :> QueryParam "offset" Offset
692 :> QueryParam "listType" ListType
693 :> QueryParam "minTermSize" MinSize
694 :> QueryParam "maxTermSize" MaxSize
695 :> QueryParam "orderBy" OrderBy
696 :> QueryParam "search" Text
697 :> Get '[JSON] (VersionedWithCount NgramsTable)
698
699 type TableNgramsApiPut = Summary " Table Ngrams API Change"
700 :> QueryParamR "ngramsType" TabType
701 :> QueryParamR "list" ListId
702 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
703 :> Put '[JSON] (Versioned NgramsTablePatch)
704
705 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
706 :> QueryParamR "ngramsType" TabType
707 :> QueryParamR "list" ListId
708 :> "recompute" :> Post '[JSON] Int
709
710 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
711 :> QueryParamR "ngramsType" TabType
712 :> QueryParamR "list" ListId
713 :> Get '[JSON] Version
714
715 type TableNgramsApi = TableNgramsApiGet
716 :<|> TableNgramsApiPut
717 :<|> RecomputeScoresNgramsApiGet
718 :<|> "version" :> TableNgramsApiGetVersion
719 :<|> TableNgramsAsyncApi
720
721 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
722 :> "async"
723 :> "charts"
724 :> "update"
725 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
726
727 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
728 => NodeId
729 -> TabType
730 -> ListId
731 -> Limit
732 -> Maybe Offset
733 -> Maybe ListType
734 -> Maybe MinSize -> Maybe MaxSize
735 -> Maybe OrderBy
736 -> Maybe Text -- full text search
737 -> m (VersionedWithCount NgramsTable)
738 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
739 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
740 where
741 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
742
743
744
745 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
746 => NodeId
747 -> TabType
748 -> ListId
749 -> m Version
750 getTableNgramsVersion _nId _tabType listId = currentVersion listId
751
752
753
754 -- TODO: limit?
755 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
756 -- This line above looks like a waste of computation to finally get only the version.
757 -- See the comment about listNgramsChangedSince.
758
759
760 -- | Text search is deactivated for now for ngrams by doc only
761 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
762 => DocId -> TabType
763 -> ListId -> Limit -> Maybe Offset
764 -> Maybe ListType
765 -> Maybe MinSize -> Maybe MaxSize
766 -> Maybe OrderBy
767 -> Maybe Text -- full text search
768 -> m (VersionedWithCount NgramsTable)
769 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
770 ns <- selectNodesWithUsername NodeList userMaster
771 let ngramsType = ngramsTypeFromTabType tabType
772 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
773 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
774 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
775
776
777
778 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
779 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
780 :<|> tableNgramsPut
781 :<|> scoresRecomputeTableNgrams cId
782 :<|> getTableNgramsVersion cId
783 :<|> apiNgramsAsync cId
784
785 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
786 apiNgramsTableDoc dId = getTableNgramsDoc dId
787 :<|> tableNgramsPut
788 :<|> scoresRecomputeTableNgrams dId
789 :<|> getTableNgramsVersion dId
790 :<|> apiNgramsAsync dId
791
792 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
793 apiNgramsAsync _dId =
794 serveJobsAPI TableNgramsJob $ \i log ->
795 let
796 log' x = do
797 printDebug "tableNgramsPostChartsAsync" x
798 liftBase $ log x
799 in tableNgramsPostChartsAsync i log'
800
801 -- Did the given list of ngrams changed since the given version?
802 -- The returned value is versioned boolean value, meaning that one always retrieve the
803 -- latest version.
804 -- If the given version is negative then one simply receive the latest version and True.
805 -- Using this function is more precise than simply comparing the latest version number
806 -- with the local version number. Indeed there might be no change to this particular list
807 -- and still the version number has changed because of other lists.
808 --
809 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
810 -- * currentVersion: good computation, good bandwidth, bad precision.
811 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
812 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
813 listNgramsChangedSince :: HasNodeStory env err m
814 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
815 listNgramsChangedSince listId ngramsType version
816 | version < 0 =
817 Versioned <$> currentVersion listId <*> pure True
818 | otherwise =
819 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)