]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[TOOLING] sql queries to remove unused documents and ngrams from database. Usage...
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsTablePatch
37 , NgramsTableMap
38
39 , NgramsTerm(..)
40
41 , NgramsElement(..)
42 , mkNgramsElement
43
44 , RootParent(..)
45
46 , MSet
47 , mSetFromList
48 , mSetToList
49
50 , Repo(..)
51 , r_version
52 , r_state
53 , r_history
54 , NgramsRepoElement(..)
55 , saveNodeStory
56 , initRepo
57
58 , TabType(..)
59
60 , QueryParamR
61 , TODO
62
63 -- Internals
64 , getNgramsTableMap
65 , dumpJsonTableMap
66 , tableNgramsPull
67 , tableNgramsPut
68
69 , Version
70 , Versioned(..)
71 , VersionedWithCount(..)
72 , currentVersion
73 , listNgramsChangedSince
74 )
75 where
76
77 import Control.Concurrent
78 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
79 import Control.Monad.Reader
80 import Data.Aeson hiding ((.=))
81 import Data.Either (Either(..))
82 import Data.Foldable
83 import Data.Map.Strict (Map)
84 import Data.Maybe (fromMaybe)
85 import Data.Monoid
86 import Data.Ord (Down(..))
87 import Data.Patch.Class (Action(act), Transformable(..), ours)
88 import Data.Swagger hiding (version, patch)
89 import Data.Text (Text, isInfixOf, unpack)
90 import Data.Text.Lazy.IO as DTL
91 import Formatting (hprint, int, (%))
92 import GHC.Generics (Generic)
93 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
94 import Gargantext.API.Admin.Types (HasSettings)
95 import Gargantext.API.Job
96 import Gargantext.API.Ngrams.Types
97 import Gargantext.API.Prelude
98 import Gargantext.Core.NodeStory
99 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
100 import Gargantext.API.Ngrams.Tools
101 import Gargantext.Database.Action.Flow.Types
102 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
103 import Gargantext.Database.Admin.Config (userMaster)
104 import Gargantext.Database.Admin.Types.Node (NodeType(..))
105 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
106 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
107 import Gargantext.Database.Query.Table.Node (getNode)
108 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
109 import Gargantext.Database.Query.Table.Node.Select
110 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
111 import Gargantext.Prelude hiding (log)
112 import Gargantext.Prelude.Clock (hasTime, getTime)
113 import Prelude (error)
114 import Servant hiding (Patch)
115 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
116 import System.IO (stderr)
117 import Test.QuickCheck (elements)
118 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
119 import qualified Data.Aeson.Text as DAT
120 import qualified Data.List as List
121 import qualified Data.Map.Strict as Map
122 import qualified Data.Map.Strict.Patch as PM
123 import qualified Data.Set as S
124 import qualified Data.Set as Set
125 import qualified Gargantext.API.Metrics as Metrics
126 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
127
128 {-
129 -- TODO sequences of modifications (Patchs)
130 type NgramsIdPatch = Patch NgramsId NgramsPatch
131
132 ngramsPatch :: Int -> NgramsPatch
133 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
134
135 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
136 toEdit n p = Edit n p
137 ngramsIdPatch :: Patch NgramsId NgramsPatch
138 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
139 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
140 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
141 ]
142
143 -- applyPatchBack :: Patch -> IO Patch
144 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
145 -}
146 ------------------------------------------------------------------------
147 ------------------------------------------------------------------------
148 ------------------------------------------------------------------------
149
150 {-
151 -- TODO: Replace.old is ignored which means that if the current list
152 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
153 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
154 -- However this should not happen in non conflicting situations.
155 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
156 mkListsUpdate nt patches =
157 [ (ngramsTypeId nt, ng, listTypeId lt)
158 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
159 , lt <- patch ^.. patch_list . new
160 ]
161
162 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
163 -> NgramsType
164 -> NgramsTablePatch
165 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
166 mkChildrenGroups addOrRem nt patches =
167 [ (ngramsTypeId nt, parent, child)
168 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
169 , child <- patch ^.. patch_children . to addOrRem . folded
170 ]
171 -}
172
173 ------------------------------------------------------------------------
174
175 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
176 => m ()
177 saveNodeStory = liftBase =<< view hasNodeStorySaver
178
179
180 listTypeConflictResolution :: ListType -> ListType -> ListType
181 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
182
183
184 ngramsStatePatchConflictResolution
185 :: TableNgrams.NgramsType
186 -> NgramsTerm
187 -> ConflictResolutionNgramsPatch
188 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
189 = (ours, (const ours, ours), (False, False))
190 -- (False, False) mean here that Mod has always priority.
191 -- (True, False) <- would mean priority to the left (same as ours).
192 -- undefined {- TODO think this through -}, listTypeConflictResolution)
193
194
195
196
197 -- Current state:
198 -- Insertions are not considered as patches,
199 -- they do not extend history,
200 -- they do not bump version.
201 insertNewOnly :: a -> Maybe b -> a
202 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
203 -- TODO error handling
204
205 {- unused
206 -- TODO refactor with putListNgrams
207 copyListNgrams :: RepoCmdM env err m
208 => NodeId -> NodeId -> NgramsType
209 -> m ()
210 copyListNgrams srcListId dstListId ngramsType = do
211 var <- view repoVar
212 liftBase $ modifyMVar_ var $
213 pure . (r_state . at ngramsType %~ (Just . f . something))
214 saveNodeStory
215 where
216 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
217 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
218
219 -- TODO refactor with putListNgrams
220 -- The list must be non-empty!
221 -- The added ngrams must be non-existent!
222 addListNgrams :: RepoCmdM env err m
223 => NodeId -> NgramsType
224 -> [NgramsElement] -> m ()
225 addListNgrams listId ngramsType nes = do
226 var <- view repoVar
227 liftBase $ modifyMVar_ var $
228 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
229 saveNodeStory
230 where
231 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
232 -}
233
234 -- | TODO: incr the Version number
235 -- && should use patch
236 -- UNSAFE
237
238 setListNgrams :: HasNodeStory env err m
239 => NodeId
240 -> TableNgrams.NgramsType
241 -> Map NgramsTerm NgramsRepoElement
242 -> m ()
243 setListNgrams listId ngramsType ns = do
244 printDebug "[setListNgrams]" (listId, ngramsType)
245 getter <- view hasNodeStory
246 var <- liftBase $ (getter ^. nse_getter) [listId]
247 liftBase $ modifyMVar_ var $
248 pure . ( unNodeStory
249 . at listId . _Just
250 . a_state
251 . at ngramsType
252 .~ Just ns
253 )
254 saveNodeStory
255
256
257 currentVersion :: HasNodeStory env err m
258 => ListId -> m Version
259 currentVersion listId = do
260 nls <- getRepo' [listId]
261 pure $ nls ^. unNodeStory . at listId . _Just . a_version
262
263
264 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
265 newNgramsFromNgramsStatePatch p =
266 [ text2ngrams (unNgramsTerm n)
267 | (n,np) <- p ^.. _PatchMap
268 -- . each . _PatchMap
269 . each . _NgramsTablePatch
270 . _PatchMap . ifolded . withIndex
271 , _ <- np ^.. patch_new . _Just
272 ]
273
274
275
276
277 commitStatePatch :: HasNodeStory env err m
278 => ListId
279 -> Versioned NgramsStatePatch'
280 -> m (Versioned NgramsStatePatch')
281 commitStatePatch listId (Versioned p_version p) = do
282 printDebug "[commitStatePatch]" listId
283 var <- getNodeStoryVar [listId]
284 vq' <- liftBase $ modifyMVar var $ \ns -> do
285 let
286 a = ns ^. unNodeStory . at listId . _Just
287 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
288 (p', q') = transformWith ngramsStatePatchConflictResolution p q
289 a' = a & a_version +~ 1
290 & a_state %~ act p'
291 & a_history %~ (p' :)
292
293 {-
294 -- Ideally we would like to check these properties. However:
295 -- * They should be checked only to debug the code. The client data
296 -- should be able to trigger these.
297 -- * What kind of error should they throw (we are in IO here)?
298 -- * Should we keep modifyMVar?
299 -- * Should we throw the validation in an Exception, catch it around
300 -- modifyMVar and throw it back as an Error?
301 assertValid $ transformable p q
302 assertValid $ applicable p' (r ^. r_state)
303 -}
304 printDebug "[commitStatePatch] a version" (a ^. a_version)
305 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
306 pure ( ns & unNodeStory . at listId .~ (Just a')
307 , Versioned (a' ^. a_version) q'
308 )
309 saveNodeStory
310 -- Save new ngrams
311 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
312
313 pure vq'
314
315
316
317 -- This is a special case of tableNgramsPut where the input patch is empty.
318 tableNgramsPull :: HasNodeStory env err m
319 => ListId
320 -> TableNgrams.NgramsType
321 -> Version
322 -> m (Versioned NgramsTablePatch)
323 tableNgramsPull listId ngramsType p_version = do
324 printDebug "[tableNgramsPull]" (listId, ngramsType)
325 var <- getNodeStoryVar [listId]
326 r <- liftBase $ readMVar var
327
328 let
329 a = r ^. unNodeStory . at listId . _Just
330 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
331 q_table = q ^. _PatchMap . at ngramsType . _Just
332
333 pure (Versioned (a ^. a_version) q_table)
334
335
336
337
338 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
339 -- Apply the given patch to the DB and returns the patch to be applied on the
340 -- client.
341 -- TODO-ACCESS check
342 tableNgramsPut :: ( HasNodeStory env err m
343 , HasInvalidError err
344 , HasSettings env
345 )
346 => TabType
347 -> ListId
348 -> Versioned NgramsTablePatch
349 -> m (Versioned NgramsTablePatch)
350 tableNgramsPut tabType listId (Versioned p_version p_table)
351 | p_table == mempty = do
352 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
353 let ngramsType = ngramsTypeFromTabType tabType
354 tableNgramsPull listId ngramsType p_version
355
356 | otherwise = do
357 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
358 let ngramsType = ngramsTypeFromTabType tabType
359 (p, p_validity) = PM.singleton ngramsType p_table
360
361 assertValid p_validity
362
363 ret <- commitStatePatch listId (Versioned p_version p)
364 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
365
366 pure ret
367
368
369
370 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
371 , FlowCmdM env err m
372 , HasNodeError err
373 , HasSettings env
374 )
375 => UpdateTableNgramsCharts
376 -> (JobLog -> m ())
377 -> m JobLog
378 tableNgramsPostChartsAsync utn logStatus = do
379 let tabType = utn ^. utn_tab_type
380 let listId = utn ^. utn_list_id
381
382 node <- getNode listId
383 let nId = node ^. node_id
384 _uId = node ^. node_user_id
385 mCId = node ^. node_parent_id
386
387 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
388 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
389
390 case mCId of
391 Nothing -> do
392 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
393 pure $ jobLogFail $ jobLogInit 1
394 Just cId -> do
395 case tabType of
396 Authors -> do
397 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
398 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
399 logRef
400 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
401 logRefSuccess
402
403 getRef
404 Institutes -> do
405 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
406 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
407 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
408 logRef
409 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
410 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
411 logRefSuccess
412 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
413 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
414 logRefSuccess
415 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
416 logRefSuccess
417
418 getRef
419 Sources -> do
420 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
421 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
422 logRef
423 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
424 logRefSuccess
425
426 getRef
427 Terms -> do
428 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
429 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
430 logRef
431 {-
432 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
433 logRefSuccess
434 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
435 logRefSuccess
436 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
437 logRefSuccess
438 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
439 logRefSuccess
440 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
441 logRefSuccess
442 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
443 -}
444 logRefSuccess
445
446 getRef
447 _ -> do
448 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
449 pure $ jobLogFail $ jobLogInit 1
450
451 {-
452 { _ne_list :: ListType
453 If we merge the parents/children we can potentially create cycles!
454 , _ne_parent :: Maybe NgramsTerm
455 , _ne_children :: MSet NgramsTerm
456 }
457 -}
458
459 getNgramsTableMap :: HasNodeStory env err m
460 => NodeId
461 -> TableNgrams.NgramsType
462 -> m (Versioned NgramsTableMap)
463 getNgramsTableMap nodeId ngramsType = do
464 v <- getNodeStoryVar [nodeId]
465 repo <- liftBase $ readMVar v
466 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
467 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
468
469
470 dumpJsonTableMap :: HasNodeStory env err m
471 => Text
472 -> NodeId
473 -> TableNgrams.NgramsType
474 -> m ()
475 dumpJsonTableMap fpath nodeId ngramsType = do
476 m <- getNgramsTableMap nodeId ngramsType
477 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
478 pure ()
479
480
481 type MinSize = Int
482 type MaxSize = Int
483
484 -- | TODO Errors management
485 -- TODO: polymorphic for Annuaire or Corpus or ...
486 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
487 -- TODO: should take only one ListId
488
489
490 getTableNgrams :: forall env err m.
491 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
492 => NodeType -> NodeId -> TabType
493 -> ListId -> Limit -> Maybe Offset
494 -> Maybe ListType
495 -> Maybe MinSize -> Maybe MaxSize
496 -> Maybe OrderBy
497 -> (NgramsTerm -> Bool)
498 -> m (VersionedWithCount NgramsTable)
499 getTableNgrams _nType nId tabType listId limit_ offset
500 listType minSize maxSize orderBy searchQuery = do
501
502 t0 <- getTime
503 -- lIds <- selectNodesWithUsername NodeList userMaster
504 let
505 ngramsType = ngramsTypeFromTabType tabType
506 offset' = maybe 0 identity offset
507 listType' = maybe (const True) (==) listType
508 minSize' = maybe (const True) (<=) minSize
509 maxSize' = maybe (const True) (>=) maxSize
510
511 selected_node n = minSize' s
512 && maxSize' s
513 && searchQuery (n ^. ne_ngrams)
514 && listType' (n ^. ne_list)
515 where
516 s = n ^. ne_size
517
518 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
519
520 ---------------------------------------
521 sortOnOrder Nothing = identity
522 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
523 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
524 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
525 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
526
527 ---------------------------------------
528
529 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
530 filteredNodes tableMap = rootOf <$> list & filter selected_node
531 where
532 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
533 (tableMap ^. at r)
534 )
535 (ne ^. ne_root)
536 list = tableMap ^.. each
537
538 ---------------------------------------
539 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
540 selectAndPaginate tableMap = roots <> inners
541 where
542 list = tableMap ^.. each
543 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
544 (tableMap ^. at r)
545 )
546 (ne ^. ne_root)
547 selected_nodes = list & take limit_
548 . drop offset'
549 . filter selected_node
550 . sortOnOrder orderBy
551 roots = rootOf <$> selected_nodes
552 rootsSet = Set.fromList (_ne_ngrams <$> roots)
553 inners = list & filter (selected_inner rootsSet)
554
555 ---------------------------------------
556 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
557 setScores False table = pure table
558 setScores True table = do
559 let ngrams_terms = table ^.. each . ne_ngrams
560 t1 <- getTime
561 occurrences <- getOccByNgramsOnlyFast' nId
562 listId
563 ngramsType
564 ngrams_terms
565 t2 <- getTime
566 liftBase $ hprint stderr
567 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
568 (length ngrams_terms) t1 t2
569 {-
570 occurrences <- getOccByNgramsOnlySlow nType nId
571 (lIds <> [listId])
572 ngramsType
573 ngrams_terms
574 -}
575 let
576 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
577
578 pure $ table & each %~ setOcc
579 ---------------------------------------
580
581 -- lists <- catMaybes <$> listsWith userMaster
582 -- trace (show lists) $
583 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
584
585
586 let scoresNeeded = needsScores orderBy
587 tableMap1 <- getNgramsTableMap listId ngramsType
588 t1 <- getTime
589 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
590 . Map.mapWithKey ngramsElementFromRepo
591
592 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
593 . filteredNodes
594 let fltrCount = length $ fltr ^. v_data . _NgramsTable
595
596 t2 <- getTime
597 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
598 . setScores (not scoresNeeded)
599 . selectAndPaginate
600 t3 <- getTime
601 liftBase $ hprint stderr
602 ("getTableNgrams total=" % hasTime
603 % " map1=" % hasTime
604 % " map2=" % hasTime
605 % " map3=" % hasTime
606 % " sql=" % (if scoresNeeded then "map2" else "map3")
607 % "\n"
608 ) t0 t3 t0 t1 t1 t2 t2 t3
609 pure $ toVersionedWithCount fltrCount tableMap3
610
611
612
613 scoresRecomputeTableNgrams :: forall env err m.
614 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
615 => NodeId -> TabType -> ListId -> m Int
616 scoresRecomputeTableNgrams nId tabType listId = do
617 tableMap <- getNgramsTableMap listId ngramsType
618 _ <- tableMap & v_data %%~ setScores
619 . Map.mapWithKey ngramsElementFromRepo
620
621 pure $ 1
622 where
623 ngramsType = ngramsTypeFromTabType tabType
624
625 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
626 setScores table = do
627 let ngrams_terms = table ^.. each . ne_ngrams
628 occurrences <- getOccByNgramsOnlyFast' nId
629 listId
630 ngramsType
631 ngrams_terms
632 let
633 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
634
635 pure $ table & each %~ setOcc
636
637
638
639
640 -- APIs
641
642 -- TODO: find a better place for the code above, All APIs stay here
643
644 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
645 deriving (Generic, Enum, Bounded, Read, Show)
646
647 instance FromHttpApiData OrderBy
648 where
649 parseUrlPiece "TermAsc" = pure TermAsc
650 parseUrlPiece "TermDesc" = pure TermDesc
651 parseUrlPiece "ScoreAsc" = pure ScoreAsc
652 parseUrlPiece "ScoreDesc" = pure ScoreDesc
653 parseUrlPiece _ = Left "Unexpected value of OrderBy"
654
655
656 instance ToParamSchema OrderBy
657 instance FromJSON OrderBy
658 instance ToJSON OrderBy
659 instance ToSchema OrderBy
660 instance Arbitrary OrderBy
661 where
662 arbitrary = elements [minBound..maxBound]
663
664 needsScores :: Maybe OrderBy -> Bool
665 needsScores (Just ScoreAsc) = True
666 needsScores (Just ScoreDesc) = True
667 needsScores _ = False
668
669 type TableNgramsApiGet = Summary " Table Ngrams API Get"
670 :> QueryParamR "ngramsType" TabType
671 :> QueryParamR "list" ListId
672 :> QueryParamR "limit" Limit
673 :> QueryParam "offset" Offset
674 :> QueryParam "listType" ListType
675 :> QueryParam "minTermSize" MinSize
676 :> QueryParam "maxTermSize" MaxSize
677 :> QueryParam "orderBy" OrderBy
678 :> QueryParam "search" Text
679 :> Get '[JSON] (VersionedWithCount NgramsTable)
680
681 type TableNgramsApiPut = Summary " Table Ngrams API Change"
682 :> QueryParamR "ngramsType" TabType
683 :> QueryParamR "list" ListId
684 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
685 :> Put '[JSON] (Versioned NgramsTablePatch)
686
687 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
688 :> QueryParamR "ngramsType" TabType
689 :> QueryParamR "list" ListId
690 :> "recompute" :> Post '[JSON] Int
691
692 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
693 :> QueryParamR "ngramsType" TabType
694 :> QueryParamR "list" ListId
695 :> Get '[JSON] Version
696
697 type TableNgramsApi = TableNgramsApiGet
698 :<|> TableNgramsApiPut
699 :<|> RecomputeScoresNgramsApiGet
700 :<|> "version" :> TableNgramsApiGetVersion
701 :<|> TableNgramsAsyncApi
702
703 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
704 :> "async"
705 :> "charts"
706 :> "update"
707 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
708
709 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
710 => NodeId
711 -> TabType
712 -> ListId
713 -> Limit
714 -> Maybe Offset
715 -> Maybe ListType
716 -> Maybe MinSize -> Maybe MaxSize
717 -> Maybe OrderBy
718 -> Maybe Text -- full text search
719 -> m (VersionedWithCount NgramsTable)
720 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
721 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
722 where
723 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
724
725
726
727 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
728 => NodeId
729 -> TabType
730 -> ListId
731 -> m Version
732 getTableNgramsVersion _nId _tabType listId = currentVersion listId
733
734
735
736 -- TODO: limit?
737 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
738 -- This line above looks like a waste of computation to finally get only the version.
739 -- See the comment about listNgramsChangedSince.
740
741
742 -- | Text search is deactivated for now for ngrams by doc only
743 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
744 => DocId -> TabType
745 -> ListId -> Limit -> Maybe Offset
746 -> Maybe ListType
747 -> Maybe MinSize -> Maybe MaxSize
748 -> Maybe OrderBy
749 -> Maybe Text -- full text search
750 -> m (VersionedWithCount NgramsTable)
751 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
752 ns <- selectNodesWithUsername NodeList userMaster
753 let ngramsType = ngramsTypeFromTabType tabType
754 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
755 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
756 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
757
758
759
760 apiNgramsTableCorpus :: ( GargServerC env err m
761 )
762 => NodeId -> ServerT TableNgramsApi m
763 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
764 :<|> tableNgramsPut
765 :<|> scoresRecomputeTableNgrams cId
766 :<|> getTableNgramsVersion cId
767 :<|> apiNgramsAsync cId
768
769 apiNgramsTableDoc :: ( GargServerC env err m
770 )
771 => DocId -> ServerT TableNgramsApi m
772 apiNgramsTableDoc dId = getTableNgramsDoc dId
773 :<|> tableNgramsPut
774 :<|> scoresRecomputeTableNgrams dId
775 :<|> getTableNgramsVersion dId
776 :<|> apiNgramsAsync dId
777
778 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
779 apiNgramsAsync _dId =
780 serveJobsAPI $
781 JobFunction $ \i log ->
782 let
783 log' x = do
784 printDebug "tableNgramsPostChartsAsync" x
785 liftBase $ log x
786 in tableNgramsPostChartsAsync i log'
787
788 -- Did the given list of ngrams changed since the given version?
789 -- The returned value is versioned boolean value, meaning that one always retrieve the
790 -- latest version.
791 -- If the given version is negative then one simply receive the latest version and True.
792 -- Using this function is more precise than simply comparing the latest version number
793 -- with the local version number. Indeed there might be no change to this particular list
794 -- and still the version number has changed because of other lists.
795 --
796 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
797 -- * currentVersion: good computation, good bandwidth, bad precision.
798 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
799 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
800 listNgramsChangedSince :: HasNodeStory env err m
801 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
802 listNgramsChangedSince listId ngramsType version
803 | version < 0 =
804 Versioned <$> currentVersion listId <*> pure True
805 | otherwise =
806 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
807
808