]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[DEBUG] message
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , Version
73 , Versioned(..)
74 , VersionedWithCount(..)
75 , currentVersion
76 , listNgramsChangedSince
77 , MinSize, MaxSize, OrderBy, NgramsTable
78 , UpdateTableNgramsCharts
79 )
80 where
81
82 import Control.Concurrent
83 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
84 import Control.Monad.Reader
85 import Data.Aeson hiding ((.=))
86 import Data.Either (Either(..))
87 import Data.Foldable
88 import Data.Map.Strict (Map)
89 import Data.Maybe (fromMaybe)
90 import Data.Monoid
91 import Data.Ord (Down(..))
92 import Data.Patch.Class (Action(act), Transformable(..), ours)
93 import Data.Swagger hiding (version, patch)
94 import Data.Text (Text, isInfixOf, unpack, pack)
95 import Data.Text.Lazy.IO as DTL
96 import Formatting (hprint, int, (%))
97 import GHC.Generics (Generic)
98 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
99 import Gargantext.API.Admin.Types (HasSettings)
100 import Gargantext.API.Job
101 import Gargantext.API.Ngrams.Types
102 import Gargantext.API.Prelude
103 import Gargantext.Core.NodeStory
104 import Gargantext.Core.Mail.Types (HasMail)
105 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
106 import Gargantext.API.Ngrams.Tools
107 import Gargantext.Database.Action.Flow.Types
108 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
109 import Gargantext.Database.Admin.Config (userMaster)
110 import Gargantext.Database.Admin.Types.Node (NodeType(..))
111 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
112 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
113 import Gargantext.Database.Query.Table.Node (getNode)
114 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
115 import Gargantext.Database.Query.Table.Node.Select
116 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
117 import Gargantext.Prelude hiding (log)
118 import Gargantext.Prelude.Clock (hasTime, getTime)
119 import Prelude (error)
120 import Servant hiding (Patch)
121 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
122 import System.IO (stderr)
123 import Test.QuickCheck (elements)
124 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
125 import qualified Data.Aeson.Text as DAT
126 import qualified Data.List as List
127 import qualified Data.Map.Strict as Map
128 import qualified Data.Map.Strict.Patch as PM
129 import qualified Data.Set as S
130 import qualified Data.Set as Set
131 import qualified Gargantext.API.Metrics as Metrics
132 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
133
134 {-
135 -- TODO sequences of modifications (Patchs)
136 type NgramsIdPatch = Patch NgramsId NgramsPatch
137
138 ngramsPatch :: Int -> NgramsPatch
139 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
140
141 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
142 toEdit n p = Edit n p
143 ngramsIdPatch :: Patch NgramsId NgramsPatch
144 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
145 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
146 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
147 ]
148
149 -- applyPatchBack :: Patch -> IO Patch
150 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
151 -}
152 ------------------------------------------------------------------------
153 ------------------------------------------------------------------------
154 ------------------------------------------------------------------------
155
156 {-
157 -- TODO: Replace.old is ignored which means that if the current list
158 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
159 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
160 -- However this should not happen in non conflicting situations.
161 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
162 mkListsUpdate nt patches =
163 [ (ngramsTypeId nt, ng, listTypeId lt)
164 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
165 , lt <- patch ^.. patch_list . new
166 ]
167
168 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
169 -> NgramsType
170 -> NgramsTablePatch
171 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
172 mkChildrenGroups addOrRem nt patches =
173 [ (ngramsTypeId nt, parent, child)
174 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
175 , child <- patch ^.. patch_children . to addOrRem . folded
176 ]
177 -}
178
179 ------------------------------------------------------------------------
180
181 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
182 => m ()
183 saveNodeStory = do
184 saver <- view hasNodeStorySaver
185 liftBase $ do
186 Gargantext.Prelude.putStrLn "---- Running node story saver ----"
187 saver
188 Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
189
190
191 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
192 => m ()
193 saveNodeStoryImmediate = do
194 saver <- view hasNodeStoryImmediateSaver
195 liftBase $ do
196 Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
197 saver
198 Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
199
200
201 listTypeConflictResolution :: ListType -> ListType -> ListType
202 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
203
204
205 ngramsStatePatchConflictResolution
206 :: TableNgrams.NgramsType
207 -> NgramsTerm
208 -> ConflictResolutionNgramsPatch
209 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
210 = (ours, (const ours, ours), (False, False))
211 -- (False, False) mean here that Mod has always priority.
212 -- (True, False) <- would mean priority to the left (same as ours).
213 -- undefined {- TODO think this through -}, listTypeConflictResolution)
214
215
216
217
218 -- Current state:
219 -- Insertions are not considered as patches,
220 -- they do not extend history,
221 -- they do not bump version.
222 insertNewOnly :: a -> Maybe b -> a
223 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
224 -- TODO error handling
225
226 {- unused
227 -- TODO refactor with putListNgrams
228 copyListNgrams :: RepoCmdM env err m
229 => NodeId -> NodeId -> NgramsType
230 -> m ()
231 copyListNgrams srcListId dstListId ngramsType = do
232 var <- view repoVar
233 liftBase $ modifyMVar_ var $
234 pure . (r_state . at ngramsType %~ (Just . f . something))
235 saveNodeStory
236 where
237 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
238 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
239
240 -- TODO refactor with putListNgrams
241 -- The list must be non-empty!
242 -- The added ngrams must be non-existent!
243 addListNgrams :: RepoCmdM env err m
244 => NodeId -> NgramsType
245 -> [NgramsElement] -> m ()
246 addListNgrams listId ngramsType nes = do
247 var <- view repoVar
248 liftBase $ modifyMVar_ var $
249 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
250 saveNodeStory
251 where
252 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
253 -}
254
255 -- | TODO: incr the Version number
256 -- && should use patch
257 -- UNSAFE
258
259 setListNgrams :: HasNodeStory env err m
260 => NodeId
261 -> TableNgrams.NgramsType
262 -> Map NgramsTerm NgramsRepoElement
263 -> m ()
264 setListNgrams listId ngramsType ns = do
265 -- printDebug "[setListNgrams]" (listId, ngramsType)
266 getter <- view hasNodeStory
267 var <- liftBase $ (getter ^. nse_getter) [listId]
268 liftBase $ modifyMVar_ var $
269 pure . ( unNodeStory
270 . at listId . _Just
271 . a_state
272 . at ngramsType
273 .~ Just ns
274 )
275 saveNodeStory
276
277
278 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
279 newNgramsFromNgramsStatePatch p =
280 [ text2ngrams (unNgramsTerm n)
281 | (n,np) <- p ^.. _PatchMap
282 -- . each . _PatchMap
283 . each . _NgramsTablePatch
284 . _PatchMap . ifolded . withIndex
285 , _ <- np ^.. patch_new . _Just
286 ]
287
288
289
290
291 commitStatePatch :: (HasNodeStory env err m, HasMail env)
292 => ListId
293 -> Versioned NgramsStatePatch'
294 -> m (Versioned NgramsStatePatch')
295 commitStatePatch listId (Versioned _p_version p) = do
296 -- printDebug "[commitStatePatch]" listId
297 var <- getNodeStoryVar [listId]
298 vq' <- liftBase $ modifyMVar var $ \ns -> do
299 let
300 a = ns ^. unNodeStory . at listId . _Just
301 -- apply patches from version p_version to a ^. a_version
302 -- TODO Check this
303 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
304 q = mconcat $ a ^. a_history
305
306 printDebug "transformWith" (p,q)
307
308 let
309 (p', q') = transformWith ngramsStatePatchConflictResolution p q
310 a' = a & a_version +~ 1
311 & a_state %~ act p'
312 & a_history %~ (p' :)
313
314 {-
315 -- Ideally we would like to check these properties. However:
316 -- * They should be checked only to debug the code. The client data
317 -- should be able to trigger these.
318 -- * What kind of error should they throw (we are in IO here)?
319 -- * Should we keep modifyMVar?
320 -- * Should we throw the validation in an Exception, catch it around
321 -- modifyMVar and throw it back as an Error?
322 assertValid $ transformable p q
323 assertValid $ applicable p' (r ^. r_state)
324 -}
325 printDebug "[commitStatePatch] a version" (a ^. a_version)
326 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
327 pure ( ns & unNodeStory . at listId .~ (Just a')
328 , Versioned (a' ^. a_version) q'
329 )
330 saveNodeStory
331 -- Save new ngrams
332 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
333
334 pure vq'
335
336
337
338 -- This is a special case of tableNgramsPut where the input patch is empty.
339 tableNgramsPull :: HasNodeStory env err m
340 => ListId
341 -> TableNgrams.NgramsType
342 -> Version
343 -> m (Versioned NgramsTablePatch)
344 tableNgramsPull listId ngramsType p_version = do
345 printDebug "[tableNgramsPull]" (listId, ngramsType)
346 var <- getNodeStoryVar [listId]
347 r <- liftBase $ readMVar var
348
349 let
350 a = r ^. unNodeStory . at listId . _Just
351 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
352 q_table = q ^. _PatchMap . at ngramsType . _Just
353
354 pure (Versioned (a ^. a_version) q_table)
355
356
357
358
359 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
360 -- Apply the given patch to the DB and returns the patch to be applied on the
361 -- client.
362 -- TODO-ACCESS check
363 tableNgramsPut :: ( HasNodeStory env err m
364 , HasInvalidError err
365 , HasSettings env
366 , HasMail env
367 )
368 => TabType
369 -> ListId
370 -> Versioned NgramsTablePatch
371 -> m (Versioned NgramsTablePatch)
372 tableNgramsPut tabType listId (Versioned p_version p_table)
373 | p_table == mempty = do
374 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
375 let ngramsType = ngramsTypeFromTabType tabType
376 tableNgramsPull listId ngramsType p_version
377
378 | otherwise = do
379 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
380 let ngramsType = ngramsTypeFromTabType tabType
381 (p, p_validity) = PM.singleton ngramsType p_table
382
383 assertValid p_validity
384
385 ret <- commitStatePatch listId (Versioned p_version p)
386 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
387
388 pure ret
389
390
391
392 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
393 , FlowCmdM env err m
394 , HasNodeError err
395 , HasSettings env
396 )
397 => UpdateTableNgramsCharts
398 -> (JobLog -> m ())
399 -> m JobLog
400 tableNgramsPostChartsAsync utn logStatus = do
401 let tabType = utn ^. utn_tab_type
402 let listId = utn ^. utn_list_id
403
404 node <- getNode listId
405 let nId = node ^. node_id
406 _uId = node ^. node_user_id
407 mCId = node ^. node_parent_id
408
409 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
410 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
411
412 case mCId of
413 Nothing -> do
414 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
415 pure $ jobLogFail $ jobLogInit 1
416 Just cId -> do
417 case tabType of
418 Authors -> do
419 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
420 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
421 logRef
422 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
423 logRefSuccess
424
425 getRef
426 Institutes -> do
427 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
428 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
429 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
430 logRef
431 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
432 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
433 logRefSuccess
434 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
435 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
436 logRefSuccess
437 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
438 logRefSuccess
439
440 getRef
441 Sources -> do
442 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
443 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
444 logRef
445 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
446 logRefSuccess
447
448 getRef
449 Terms -> do
450 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
451 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
452 logRef
453 {-
454 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
455 logRefSuccess
456 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
457 logRefSuccess
458 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
459 logRefSuccess
460 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
461 logRefSuccess
462 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
463 logRefSuccess
464 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
465 -}
466 logRefSuccess
467
468 getRef
469 _ -> do
470 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
471 pure $ jobLogFail $ jobLogInit 1
472
473 {-
474 { _ne_list :: ListType
475 If we merge the parents/children we can potentially create cycles!
476 , _ne_parent :: Maybe NgramsTerm
477 , _ne_children :: MSet NgramsTerm
478 }
479 -}
480
481 getNgramsTableMap :: HasNodeStory env err m
482 => NodeId
483 -> TableNgrams.NgramsType
484 -> m (Versioned NgramsTableMap)
485 getNgramsTableMap nodeId ngramsType = do
486 v <- getNodeStoryVar [nodeId]
487 repo <- liftBase $ readMVar v
488 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
489 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
490
491
492 dumpJsonTableMap :: HasNodeStory env err m
493 => Text
494 -> NodeId
495 -> TableNgrams.NgramsType
496 -> m ()
497 dumpJsonTableMap fpath nodeId ngramsType = do
498 m <- getNgramsTableMap nodeId ngramsType
499 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
500 pure ()
501
502
503 type MinSize = Int
504 type MaxSize = Int
505
506 -- | TODO Errors management
507 -- TODO: polymorphic for Annuaire or Corpus or ...
508 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
509 -- TODO: should take only one ListId
510
511
512 getTableNgrams :: forall env err m.
513 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
514 => NodeType -> NodeId -> TabType
515 -> ListId -> Limit -> Maybe Offset
516 -> Maybe ListType
517 -> Maybe MinSize -> Maybe MaxSize
518 -> Maybe OrderBy
519 -> (NgramsTerm -> Bool)
520 -> m (VersionedWithCount NgramsTable)
521 getTableNgrams _nType nId tabType listId limit_ offset
522 listType minSize maxSize orderBy searchQuery = do
523
524 t0 <- getTime
525 -- lIds <- selectNodesWithUsername NodeList userMaster
526 let
527 ngramsType = ngramsTypeFromTabType tabType
528 offset' = maybe 0 identity offset
529 listType' = maybe (const True) (==) listType
530 minSize' = maybe (const True) (<=) minSize
531 maxSize' = maybe (const True) (>=) maxSize
532
533 selected_node n = minSize' s
534 && maxSize' s
535 && searchQuery (n ^. ne_ngrams)
536 && listType' (n ^. ne_list)
537 where
538 s = n ^. ne_size
539
540 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
541
542 ---------------------------------------
543 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
544 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
545 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
546 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
547 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
548
549 ---------------------------------------
550 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
551 filteredNodes tableMap = rootOf <$> list & filter selected_node
552 where
553 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
554 (tableMap ^. at r)
555 )
556 (ne ^. ne_root)
557 list = tableMap ^.. each
558
559 ---------------------------------------
560 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
561 selectAndPaginate tableMap = roots <> inners
562 where
563 list = tableMap ^.. each
564 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
565 (tableMap ^. at r)
566 )
567 (ne ^. ne_root)
568 selected_nodes = list & take limit_
569 . drop offset'
570 . filter selected_node
571 . sortOnOrder orderBy
572 roots = rootOf <$> selected_nodes
573 rootsSet = Set.fromList (_ne_ngrams <$> roots)
574 inners = list & filter (selected_inner rootsSet)
575
576 ---------------------------------------
577 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
578 setScores False table = pure table
579 setScores True table = do
580 let ngrams_terms = table ^.. each . ne_ngrams
581 -- printDebug "ngrams_terms" ngrams_terms
582 t1 <- getTime
583 occurrences <- getOccByNgramsOnlyFast' nId
584 listId
585 ngramsType
586 ngrams_terms
587 --printDebug "occurrences" occurrences
588 t2 <- getTime
589 liftBase $ hprint stderr
590 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
591 (length ngrams_terms) t1 t2
592 let
593 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
594
595 pure $ table & each %~ setOcc
596 ---------------------------------------
597
598 -- lists <- catMaybes <$> listsWith userMaster
599 -- trace (show lists) $
600 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
601
602
603 let scoresNeeded = needsScores orderBy
604 tableMap1 <- getNgramsTableMap listId ngramsType
605 t1 <- getTime
606
607 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
608 . Map.mapWithKey ngramsElementFromRepo
609
610 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
611 . filteredNodes
612
613 let fltrCount = length $ fltr ^. v_data . _NgramsTable
614
615 t2 <- getTime
616 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
617 . setScores (not scoresNeeded)
618 . selectAndPaginate
619 t3 <- getTime
620 liftBase $ hprint stderr
621 ("getTableNgrams total=" % hasTime
622 % " map1=" % hasTime
623 % " map2=" % hasTime
624 % " map3=" % hasTime
625 % " sql=" % (if scoresNeeded then "map2" else "map3")
626 % "\n"
627 ) t0 t3 t0 t1 t1 t2 t2 t3
628 pure $ toVersionedWithCount fltrCount tableMap3
629
630
631
632 scoresRecomputeTableNgrams :: forall env err m.
633 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
634 => NodeId -> TabType -> ListId -> m Int
635 scoresRecomputeTableNgrams nId tabType listId = do
636 tableMap <- getNgramsTableMap listId ngramsType
637 _ <- tableMap & v_data %%~ setScores
638 . Map.mapWithKey ngramsElementFromRepo
639
640 pure $ 1
641 where
642 ngramsType = ngramsTypeFromTabType tabType
643
644 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
645 setScores table = do
646 let ngrams_terms = table ^.. each . ne_ngrams
647 occurrences <- getOccByNgramsOnlyFast' nId
648 listId
649 ngramsType
650 ngrams_terms
651 let
652 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
653
654 pure $ table & each %~ setOcc
655
656
657
658
659 -- APIs
660
661 -- TODO: find a better place for the code above, All APIs stay here
662
663 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
664 deriving (Generic, Enum, Bounded, Read, Show)
665
666 instance FromHttpApiData OrderBy
667 where
668 parseUrlPiece "TermAsc" = pure TermAsc
669 parseUrlPiece "TermDesc" = pure TermDesc
670 parseUrlPiece "ScoreAsc" = pure ScoreAsc
671 parseUrlPiece "ScoreDesc" = pure ScoreDesc
672 parseUrlPiece _ = Left "Unexpected value of OrderBy"
673
674 instance ToHttpApiData OrderBy where
675 toUrlPiece = pack . show
676
677 instance ToParamSchema OrderBy
678 instance FromJSON OrderBy
679 instance ToJSON OrderBy
680 instance ToSchema OrderBy
681 instance Arbitrary OrderBy
682 where
683 arbitrary = elements [minBound..maxBound]
684
685 needsScores :: Maybe OrderBy -> Bool
686 needsScores (Just ScoreAsc) = True
687 needsScores (Just ScoreDesc) = True
688 needsScores _ = False
689
690 type TableNgramsApiGet = Summary " Table Ngrams API Get"
691 :> QueryParamR "ngramsType" TabType
692 :> QueryParamR "list" ListId
693 :> QueryParamR "limit" Limit
694 :> QueryParam "offset" Offset
695 :> QueryParam "listType" ListType
696 :> QueryParam "minTermSize" MinSize
697 :> QueryParam "maxTermSize" MaxSize
698 :> QueryParam "orderBy" OrderBy
699 :> QueryParam "search" Text
700 :> Get '[JSON] (VersionedWithCount NgramsTable)
701
702 type TableNgramsApiPut = Summary " Table Ngrams API Change"
703 :> QueryParamR "ngramsType" TabType
704 :> QueryParamR "list" ListId
705 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
706 :> Put '[JSON] (Versioned NgramsTablePatch)
707
708 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
709 :> QueryParamR "ngramsType" TabType
710 :> QueryParamR "list" ListId
711 :> "recompute" :> Post '[JSON] Int
712
713 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
714 :> QueryParamR "ngramsType" TabType
715 :> QueryParamR "list" ListId
716 :> Get '[JSON] Version
717
718 type TableNgramsApi = TableNgramsApiGet
719 :<|> TableNgramsApiPut
720 :<|> RecomputeScoresNgramsApiGet
721 :<|> "version" :> TableNgramsApiGetVersion
722 :<|> TableNgramsAsyncApi
723
724 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
725 :> "async"
726 :> "charts"
727 :> "update"
728 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
729
730 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
731 => NodeId
732 -> TabType
733 -> ListId
734 -> Limit
735 -> Maybe Offset
736 -> Maybe ListType
737 -> Maybe MinSize -> Maybe MaxSize
738 -> Maybe OrderBy
739 -> Maybe Text -- full text search
740 -> m (VersionedWithCount NgramsTable)
741 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
742 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
743 where
744 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
745
746
747
748 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
749 => NodeId
750 -> TabType
751 -> ListId
752 -> m Version
753 getTableNgramsVersion _nId _tabType listId = currentVersion listId
754
755
756
757 -- TODO: limit?
758 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
759 -- This line above looks like a waste of computation to finally get only the version.
760 -- See the comment about listNgramsChangedSince.
761
762
763 -- | Text search is deactivated for now for ngrams by doc only
764 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
765 => DocId -> TabType
766 -> ListId -> Limit -> Maybe Offset
767 -> Maybe ListType
768 -> Maybe MinSize -> Maybe MaxSize
769 -> Maybe OrderBy
770 -> Maybe Text -- full text search
771 -> m (VersionedWithCount NgramsTable)
772 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
773 ns <- selectNodesWithUsername NodeList userMaster
774 let ngramsType = ngramsTypeFromTabType tabType
775 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
776 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
777 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
778
779
780
781 apiNgramsTableCorpus :: ( GargServerC env err m
782 )
783 => NodeId -> ServerT TableNgramsApi m
784 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
785 :<|> tableNgramsPut
786 :<|> scoresRecomputeTableNgrams cId
787 :<|> getTableNgramsVersion cId
788 :<|> apiNgramsAsync cId
789
790 apiNgramsTableDoc :: ( GargServerC env err m
791 )
792 => DocId -> ServerT TableNgramsApi m
793 apiNgramsTableDoc dId = getTableNgramsDoc dId
794 :<|> tableNgramsPut
795 :<|> scoresRecomputeTableNgrams dId
796 :<|> getTableNgramsVersion dId
797 :<|> apiNgramsAsync dId
798
799 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
800 apiNgramsAsync _dId =
801 serveJobsAPI $
802 JobFunction $ \i log ->
803 let
804 log' x = do
805 printDebug "tableNgramsPostChartsAsync" x
806 liftBase $ log x
807 in tableNgramsPostChartsAsync i log'
808
809 -- Did the given list of ngrams changed since the given version?
810 -- The returned value is versioned boolean value, meaning that one always retrieve the
811 -- latest version.
812 -- If the given version is negative then one simply receive the latest version and True.
813 -- Using this function is more precise than simply comparing the latest version number
814 -- with the local version number. Indeed there might be no change to this particular list
815 -- and still the version number has changed because of other lists.
816 --
817 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
818 -- * currentVersion: good computation, good bandwidth, bad precision.
819 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
820 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
821 listNgramsChangedSince :: HasNodeStory env err m
822 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
823 listNgramsChangedSince listId ngramsType version
824 | version < 0 =
825 Versioned <$> currentVersion listId <*> pure True
826 | otherwise =
827 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)