]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[ngrams] refactor some code related to getNgramsTable
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , getNgramsTable'
73 , setNgramsTableScores
74
75 , Version
76 , Versioned(..)
77 , VersionedWithCount(..)
78 , currentVersion
79 , listNgramsChangedSince
80 , MinSize, MaxSize, OrderBy, NgramsTable
81 , UpdateTableNgramsCharts
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import Data.Either (Either(..))
90 import Data.Foldable
91 import Data.Map.Strict (Map)
92 import Data.Maybe (fromMaybe)
93 import Data.Monoid
94 import Data.Ord (Down(..))
95 import Data.Patch.Class (Action(act), Transformable(..), ours)
96 import Data.Swagger hiding (version, patch)
97 import Data.Text (Text, isInfixOf, unpack, pack)
98 import Data.Text.Lazy.IO as DTL
99 import Formatting (hprint, int, (%))
100 import GHC.Generics (Generic)
101 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
102 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
103 import Gargantext.API.Admin.Types (HasSettings)
104 import Gargantext.API.Job
105 import Gargantext.API.Ngrams.Types
106 import Gargantext.API.Prelude
107 import Gargantext.Core.NodeStory
108 import Gargantext.Core.Mail.Types (HasMail)
109 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
110 import Gargantext.API.Ngrams.Tools
111 import Gargantext.Database.Action.Flow.Types
112 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
113 import Gargantext.Database.Admin.Config (userMaster)
114 import Gargantext.Database.Admin.Types.Node (NodeType(..))
115 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
116 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
117 import Gargantext.Database.Query.Table.Node (getNode)
118 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
119 import Gargantext.Database.Query.Table.Node.Select
120 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
121 import Gargantext.Prelude hiding (log)
122 import Gargantext.Prelude.Clock (hasTime, getTime)
123 import Prelude (error)
124 import Servant hiding (Patch)
125 import Gargantext.Utils.Jobs (serveJobsAPI)
126 import System.IO (stderr)
127 import Test.QuickCheck (elements)
128 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
129 import qualified Data.Aeson.Text as DAT
130 import qualified Data.List as List
131 import qualified Data.Map.Strict as Map
132 import qualified Data.Map.Strict.Patch as PM
133 import qualified Data.Set as S
134 import qualified Data.Set as Set
135 import qualified Gargantext.API.Metrics as Metrics
136 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
137
138 {-
139 -- TODO sequences of modifications (Patchs)
140 type NgramsIdPatch = Patch NgramsId NgramsPatch
141
142 ngramsPatch :: Int -> NgramsPatch
143 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
144
145 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
146 toEdit n p = Edit n p
147 ngramsIdPatch :: Patch NgramsId NgramsPatch
148 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
149 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
150 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
151 ]
152
153 -- applyPatchBack :: Patch -> IO Patch
154 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
155 -}
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158 ------------------------------------------------------------------------
159
160 {-
161 -- TODO: Replace.old is ignored which means that if the current list
162 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
163 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
164 -- However this should not happen in non conflicting situations.
165 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
166 mkListsUpdate nt patches =
167 [ (ngramsTypeId nt, ng, listTypeId lt)
168 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
169 , lt <- patch ^.. patch_list . new
170 ]
171
172 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
173 -> NgramsType
174 -> NgramsTablePatch
175 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
176 mkChildrenGroups addOrRem nt patches =
177 [ (ngramsTypeId nt, parent, child)
178 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
179 , child <- patch ^.. patch_children . to addOrRem . folded
180 ]
181 -}
182
183 ------------------------------------------------------------------------
184
185 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
186 => m ()
187 saveNodeStory = do
188 saver <- view hasNodeStorySaver
189 liftBase $ do
190 Gargantext.Prelude.putStrLn "---- Running node story saver ----"
191 saver
192 Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
193
194
195 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
196 => m ()
197 saveNodeStoryImmediate = do
198 saver <- view hasNodeStoryImmediateSaver
199 liftBase $ do
200 Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
201 saver
202 Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
203
204
205 listTypeConflictResolution :: ListType -> ListType -> ListType
206 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
207
208
209 ngramsStatePatchConflictResolution
210 :: TableNgrams.NgramsType
211 -> NgramsTerm
212 -> ConflictResolutionNgramsPatch
213 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
214 -- = (ours, (const ours, ours), (False, False))
215 -- (False, False) mean here that Mod has always priority.
216 = (ours, (const ours, ours), (True, False))
217 -- (True, False) <- would mean priority to the left (same as ours).
218 -- undefined {- TODO think this through -}, listTypeConflictResolution)
219
220
221
222
223 -- Current state:
224 -- Insertions are not considered as patches,
225 -- they do not extend history,
226 -- they do not bump version.
227 insertNewOnly :: a -> Maybe b -> a
228 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
229 -- TODO error handling
230
231 {- unused
232 -- TODO refactor with putListNgrams
233 copyListNgrams :: RepoCmdM env err m
234 => NodeId -> NodeId -> NgramsType
235 -> m ()
236 copyListNgrams srcListId dstListId ngramsType = do
237 var <- view repoVar
238 liftBase $ modifyMVar_ var $
239 pure . (r_state . at ngramsType %~ (Just . f . something))
240 saveNodeStory
241 where
242 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
243 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
244
245 -- TODO refactor with putListNgrams
246 -- The list must be non-empty!
247 -- The added ngrams must be non-existent!
248 addListNgrams :: RepoCmdM env err m
249 => NodeId -> NgramsType
250 -> [NgramsElement] -> m ()
251 addListNgrams listId ngramsType nes = do
252 var <- view repoVar
253 liftBase $ modifyMVar_ var $
254 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
255 saveNodeStory
256 where
257 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
258 -}
259
260 -- | TODO: incr the Version number
261 -- && should use patch
262 -- UNSAFE
263
264 setListNgrams :: HasNodeStory env err m
265 => NodeId
266 -> TableNgrams.NgramsType
267 -> Map NgramsTerm NgramsRepoElement
268 -> m ()
269 setListNgrams listId ngramsType ns = do
270 -- printDebug "[setListNgrams]" (listId, ngramsType)
271 getter <- view hasNodeStory
272 var <- liftBase $ (getter ^. nse_getter) [listId]
273 liftBase $ modifyMVar_ var $
274 pure . ( unNodeStory
275 . at listId . _Just
276 . a_state
277 . at ngramsType
278 .~ Just ns
279 )
280 saveNodeStory
281
282
283 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
284 newNgramsFromNgramsStatePatch p =
285 [ text2ngrams (unNgramsTerm n)
286 | (n,np) <- p ^.. _PatchMap
287 -- . each . _PatchMap
288 . each . _NgramsTablePatch
289 . _PatchMap . ifolded . withIndex
290 , _ <- np ^.. patch_new . _Just
291 ]
292
293
294
295
296 commitStatePatch :: (HasNodeStory env err m, HasMail env)
297 => ListId
298 -> Versioned NgramsStatePatch'
299 -> m (Versioned NgramsStatePatch')
300 commitStatePatch listId (Versioned _p_version p) = do
301 -- printDebug "[commitStatePatch]" listId
302 var <- getNodeStoryVar [listId]
303 vq' <- liftBase $ modifyMVar var $ \ns -> do
304 let
305 a = ns ^. unNodeStory . at listId . _Just
306 -- apply patches from version p_version to a ^. a_version
307 -- TODO Check this
308 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
309 q = mconcat $ a ^. a_history
310
311 printDebug "transformWith" (p,q)
312
313 let
314 (p', q') = transformWith ngramsStatePatchConflictResolution p q
315 a' = a & a_version +~ 1
316 & a_state %~ act p'
317 & a_history %~ (p' :)
318
319 {-
320 -- Ideally we would like to check these properties. However:
321 -- * They should be checked only to debug the code. The client data
322 -- should be able to trigger these.
323 -- * What kind of error should they throw (we are in IO here)?
324 -- * Should we keep modifyMVar?
325 -- * Should we throw the validation in an Exception, catch it around
326 -- modifyMVar and throw it back as an Error?
327 assertValid $ transformable p q
328 assertValid $ applicable p' (r ^. r_state)
329 -}
330 printDebug "[commitStatePatch] a version" (a ^. a_version)
331 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
332 pure ( ns & unNodeStory . at listId .~ (Just a')
333 , Versioned (a' ^. a_version) q'
334 )
335 saveNodeStory
336 -- Save new ngrams
337 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
338
339 pure vq'
340
341
342
343 -- This is a special case of tableNgramsPut where the input patch is empty.
344 tableNgramsPull :: HasNodeStory env err m
345 => ListId
346 -> TableNgrams.NgramsType
347 -> Version
348 -> m (Versioned NgramsTablePatch)
349 tableNgramsPull listId ngramsType p_version = do
350 printDebug "[tableNgramsPull]" (listId, ngramsType)
351 var <- getNodeStoryVar [listId]
352 r <- liftBase $ readMVar var
353
354 let
355 a = r ^. unNodeStory . at listId . _Just
356 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
357 q_table = q ^. _PatchMap . at ngramsType . _Just
358
359 pure (Versioned (a ^. a_version) q_table)
360
361
362
363
364 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
365 -- Apply the given patch to the DB and returns the patch to be applied on the
366 -- client.
367 -- TODO-ACCESS check
368 tableNgramsPut :: ( HasNodeStory env err m
369 , HasInvalidError err
370 , HasSettings env
371 , HasMail env
372 )
373 => TabType
374 -> ListId
375 -> Versioned NgramsTablePatch
376 -> m (Versioned NgramsTablePatch)
377 tableNgramsPut tabType listId (Versioned p_version p_table)
378 | p_table == mempty = do
379 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
380 let ngramsType = ngramsTypeFromTabType tabType
381 tableNgramsPull listId ngramsType p_version
382
383 | otherwise = do
384 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
385 let ngramsType = ngramsTypeFromTabType tabType
386 (p, p_validity) = PM.singleton ngramsType p_table
387
388 assertValid p_validity
389
390 ret <- commitStatePatch listId (Versioned p_version p)
391 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
392
393 pure ret
394
395
396
397 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
398 , FlowCmdM env err m
399 , HasNodeError err
400 , HasSettings env
401 )
402 => UpdateTableNgramsCharts
403 -> (JobLog -> m ())
404 -> m JobLog
405 tableNgramsPostChartsAsync utn logStatus = do
406 let tabType = utn ^. utn_tab_type
407 let listId = utn ^. utn_list_id
408
409 node <- getNode listId
410 let nId = node ^. node_id
411 _uId = node ^. node_user_id
412 mCId = node ^. node_parent_id
413
414 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
415 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
416
417 case mCId of
418 Nothing -> do
419 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
420 pure $ jobLogFail $ jobLogInit 1
421 Just cId -> do
422 case tabType of
423 Authors -> do
424 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
425 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
426 logRef
427 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
428 logRefSuccess
429
430 getRef
431 Institutes -> do
432 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
433 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
434 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
435 logRef
436 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
437 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
438 logRefSuccess
439 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
440 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
441 logRefSuccess
442 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
443 logRefSuccess
444
445 getRef
446 Sources -> do
447 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
448 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
449 logRef
450 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
451 logRefSuccess
452
453 getRef
454 Terms -> do
455 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
456 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
457 logRef
458 {-
459 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
460 logRefSuccess
461 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
462 logRefSuccess
463 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
464 logRefSuccess
465 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
466 logRefSuccess
467 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
468 logRefSuccess
469 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
470 -}
471 logRefSuccess
472
473 getRef
474 _ -> do
475 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
476 pure $ jobLogFail $ jobLogInit 1
477
478 {-
479 { _ne_list :: ListType
480 If we merge the parents/children we can potentially create cycles!
481 , _ne_parent :: Maybe NgramsTerm
482 , _ne_children :: MSet NgramsTerm
483 }
484 -}
485
486 getNgramsTableMap :: HasNodeStory env err m
487 => NodeId
488 -> TableNgrams.NgramsType
489 -> m (Versioned NgramsTableMap)
490 getNgramsTableMap nodeId ngramsType = do
491 v <- getNodeStoryVar [nodeId]
492 repo <- liftBase $ readMVar v
493 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
494 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
495
496
497 dumpJsonTableMap :: HasNodeStory env err m
498 => Text
499 -> NodeId
500 -> TableNgrams.NgramsType
501 -> m ()
502 dumpJsonTableMap fpath nodeId ngramsType = do
503 m <- getNgramsTableMap nodeId ngramsType
504 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
505 pure ()
506
507
508 type MinSize = Int
509 type MaxSize = Int
510
511 -- | TODO Errors management
512 -- TODO: polymorphic for Annuaire or Corpus or ...
513 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
514 -- TODO: should take only one ListId
515
516
517 getTableNgrams :: forall env err m.
518 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
519 => NodeType -> NodeId -> TabType
520 -> ListId -> Limit -> Maybe Offset
521 -> Maybe ListType
522 -> Maybe MinSize -> Maybe MaxSize
523 -> Maybe OrderBy
524 -> (NgramsTerm -> Bool)
525 -> m (VersionedWithCount NgramsTable)
526 getTableNgrams _nType nId tabType listId limit_ offset
527 listType minSize maxSize orderBy searchQuery = do
528
529 t0 <- getTime
530 -- lIds <- selectNodesWithUsername NodeList userMaster
531 let
532 ngramsType = ngramsTypeFromTabType tabType
533 offset' = maybe 0 identity offset
534 listType' = maybe (const True) (==) listType
535 minSize' = maybe (const True) (<=) minSize
536 maxSize' = maybe (const True) (>=) maxSize
537
538 selected_node n = minSize' s
539 && maxSize' s
540 && searchQuery (n ^. ne_ngrams)
541 && listType' (n ^. ne_list)
542 where
543 s = n ^. ne_size
544
545 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
546
547 ---------------------------------------
548 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
549 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
550 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
551 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
552 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
553
554 ---------------------------------------
555 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
556 filteredNodes tableMap = rootOf <$> list & filter selected_node
557 where
558 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
559 (tableMap ^. at r)
560 )
561 (ne ^. ne_root)
562 list = tableMap ^.. each
563
564 ---------------------------------------
565 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
566 selectAndPaginate tableMap = roots <> inners
567 where
568 list = tableMap ^.. each
569 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
570 (tableMap ^. at r)
571 )
572 (ne ^. ne_root)
573 selected_nodes = list & take limit_
574 . drop offset'
575 . filter selected_node
576 . sortOnOrder orderBy
577 roots = rootOf <$> selected_nodes
578 rootsSet = Set.fromList (_ne_ngrams <$> roots)
579 inners = list & filter (selected_inner rootsSet)
580
581 ---------------------------------------
582
583 -- lists <- catMaybes <$> listsWith userMaster
584 -- trace (show lists) $
585 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
586
587
588 let scoresNeeded = needsScores orderBy
589 t1 <- getTime
590
591 tableMap2 <- getNgramsTable' nId listId ngramsType orderBy
592
593 -- TODO Refactor: `fltr` and `tableMap3` use very similar functions
594 let fmapScores = fmap NgramsTable
595 . (setNgramsTableScores nId listId ngramsType (not scoresNeeded))
596
597 fltr <- tableMap2 & v_data %%~ fmapScores . filteredNodes
598
599 let fltrCount = length $ fltr ^. v_data . _NgramsTable
600
601 t2 <- getTime
602 tableMap3 <- tableMap2 & v_data %%~ fmapScores . selectAndPaginate
603 t3 <- getTime
604 liftBase $ do
605 hprint stderr
606 ("getTableNgrams total=" % hasTime
607 % " map1=" % hasTime
608 % " map2=" % hasTime
609 % " map3=" % hasTime
610 % " sql=" % (if scoresNeeded then "map2" else "map3")
611 % "\n"
612 ) t0 t3 t0 t1 t1 t2 t2 t3
613
614 -- printDebug "[getTableNgrams] tableMap3" $ show tableMap3
615 pure $ toVersionedWithCount fltrCount tableMap3
616
617
618 getNgramsTable' :: forall env err m.
619 ( HasNodeStory env err m
620 , HasNodeError err
621 , HasConnectionPool env
622 , HasConfig env
623 , HasMail env)
624 => NodeId
625 -> ListId
626 -> TableNgrams.NgramsType
627 -> Maybe OrderBy
628 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
629 getNgramsTable' nId listId ngramsType orderBy = do
630 let scoresNeeded = needsScores orderBy
631 tableMap1 <- getNgramsTableMap listId ngramsType
632 tableMap1 & v_data %%~ (setNgramsTableScores nId listId ngramsType scoresNeeded)
633 . Map.mapWithKey ngramsElementFromRepo
634
635 ---------------------------------------
636 setNgramsTableScores :: forall env err m t.
637 ( Each t t NgramsElement NgramsElement
638 , HasNodeStory env err m
639 , HasNodeError err
640 , HasConnectionPool env
641 , HasConfig env
642 , HasMail env)
643 => NodeId
644 -> ListId
645 -> TableNgrams.NgramsType
646 -> Bool
647 -> t
648 -> m t
649 setNgramsTableScores _ _ _ False table = pure table
650 setNgramsTableScores nId listId ngramsType True table = do
651 let ngrams_terms = table ^.. each . ne_ngrams
652 -- printDebug "ngrams_terms" ngrams_terms
653 t1 <- getTime
654 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
655 --printDebug "occurrences" occurrences
656 t2 <- getTime
657 liftBase $ hprint stderr
658 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
659 (length ngrams_terms) t1 t2
660 let
661 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
662
663 pure $ table & each %~ setOcc
664
665
666
667
668 scoresRecomputeTableNgrams :: forall env err m.
669 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
670 => NodeId -> TabType -> ListId -> m Int
671 scoresRecomputeTableNgrams nId tabType listId = do
672 tableMap <- getNgramsTableMap listId ngramsType
673 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType True)
674 . Map.mapWithKey ngramsElementFromRepo
675
676 pure $ 1
677 where
678 ngramsType = ngramsTypeFromTabType tabType
679
680
681 -- APIs
682
683 -- TODO: find a better place for the code above, All APIs stay here
684
685 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
686 deriving (Generic, Enum, Bounded, Read, Show)
687
688 instance FromHttpApiData OrderBy
689 where
690 parseUrlPiece "TermAsc" = pure TermAsc
691 parseUrlPiece "TermDesc" = pure TermDesc
692 parseUrlPiece "ScoreAsc" = pure ScoreAsc
693 parseUrlPiece "ScoreDesc" = pure ScoreDesc
694 parseUrlPiece _ = Left "Unexpected value of OrderBy"
695
696 instance ToHttpApiData OrderBy where
697 toUrlPiece = pack . show
698
699 instance ToParamSchema OrderBy
700 instance FromJSON OrderBy
701 instance ToJSON OrderBy
702 instance ToSchema OrderBy
703 instance Arbitrary OrderBy
704 where
705 arbitrary = elements [minBound..maxBound]
706
707 needsScores :: Maybe OrderBy -> Bool
708 needsScores (Just ScoreAsc) = True
709 needsScores (Just ScoreDesc) = True
710 needsScores _ = False
711
712 type TableNgramsApiGet = Summary " Table Ngrams API Get"
713 :> QueryParamR "ngramsType" TabType
714 :> QueryParamR "list" ListId
715 :> QueryParamR "limit" Limit
716 :> QueryParam "offset" Offset
717 :> QueryParam "listType" ListType
718 :> QueryParam "minTermSize" MinSize
719 :> QueryParam "maxTermSize" MaxSize
720 :> QueryParam "orderBy" OrderBy
721 :> QueryParam "search" Text
722 :> Get '[JSON] (VersionedWithCount NgramsTable)
723
724 type TableNgramsApiPut = Summary " Table Ngrams API Change"
725 :> QueryParamR "ngramsType" TabType
726 :> QueryParamR "list" ListId
727 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
728 :> Put '[JSON] (Versioned NgramsTablePatch)
729
730 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
731 :> QueryParamR "ngramsType" TabType
732 :> QueryParamR "list" ListId
733 :> "recompute" :> Post '[JSON] Int
734
735 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
736 :> QueryParamR "ngramsType" TabType
737 :> QueryParamR "list" ListId
738 :> Get '[JSON] Version
739
740 type TableNgramsApi = TableNgramsApiGet
741 :<|> TableNgramsApiPut
742 :<|> RecomputeScoresNgramsApiGet
743 :<|> "version" :> TableNgramsApiGetVersion
744 :<|> TableNgramsAsyncApi
745
746 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
747 :> "async"
748 :> "charts"
749 :> "update"
750 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
751
752 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
753 => NodeId
754 -> TabType
755 -> ListId
756 -> Limit
757 -> Maybe Offset
758 -> Maybe ListType
759 -> Maybe MinSize -> Maybe MaxSize
760 -> Maybe OrderBy
761 -> Maybe Text -- full text search
762 -> m (VersionedWithCount NgramsTable)
763 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
764 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
765 where
766 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
767
768
769
770 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
771 => NodeId
772 -> TabType
773 -> ListId
774 -> m Version
775 getTableNgramsVersion _nId _tabType listId = currentVersion listId
776
777
778
779 -- TODO: limit?
780 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
781 -- This line above looks like a waste of computation to finally get only the version.
782 -- See the comment about listNgramsChangedSince.
783
784
785 -- | Text search is deactivated for now for ngrams by doc only
786 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
787 => DocId -> TabType
788 -> ListId -> Limit -> Maybe Offset
789 -> Maybe ListType
790 -> Maybe MinSize -> Maybe MaxSize
791 -> Maybe OrderBy
792 -> Maybe Text -- full text search
793 -> m (VersionedWithCount NgramsTable)
794 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
795 ns <- selectNodesWithUsername NodeList userMaster
796 let ngramsType = ngramsTypeFromTabType tabType
797 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
798 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
799 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
800
801
802
803 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
804 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
805 :<|> tableNgramsPut
806 :<|> scoresRecomputeTableNgrams cId
807 :<|> getTableNgramsVersion cId
808 :<|> apiNgramsAsync cId
809
810 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
811 apiNgramsTableDoc dId = getTableNgramsDoc dId
812 :<|> tableNgramsPut
813 :<|> scoresRecomputeTableNgrams dId
814 :<|> getTableNgramsVersion dId
815 :<|> apiNgramsAsync dId
816
817 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
818 apiNgramsAsync _dId =
819 serveJobsAPI TableNgramsJob $ \i log ->
820 let
821 log' x = do
822 printDebug "tableNgramsPostChartsAsync" x
823 liftBase $ log x
824 in tableNgramsPostChartsAsync i log'
825
826 -- Did the given list of ngrams changed since the given version?
827 -- The returned value is versioned boolean value, meaning that one always retrieve the
828 -- latest version.
829 -- If the given version is negative then one simply receive the latest version and True.
830 -- Using this function is more precise than simply comparing the latest version number
831 -- with the local version number. Indeed there might be no change to this particular list
832 -- and still the version number has changed because of other lists.
833 --
834 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
835 -- * currentVersion: good computation, good bandwidth, bad precision.
836 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
837 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
838 listNgramsChangedSince :: HasNodeStory env err m
839 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
840 listNgramsChangedSince listId ngramsType version
841 | version < 0 =
842 Versioned <$> currentVersion listId <*> pure True
843 | otherwise =
844 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)