]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Merge branch 'dev' into 141-dev-node-stories-db-optimization
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , initRepo
59
60 , TabType(..)
61
62 , QueryParamR
63 , TODO
64
65 -- Internals
66 , getNgramsTableMap
67 , dumpJsonTableMap
68 , tableNgramsPull
69 , tableNgramsPut
70
71 , Version
72 , Versioned(..)
73 , VersionedWithCount(..)
74 , currentVersion
75 , listNgramsChangedSince
76 , MinSize, MaxSize, OrderBy, NgramsTable
77 , UpdateTableNgramsCharts
78 )
79 where
80
81 import Control.Concurrent
82 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
83 import Control.Monad.Reader
84 import Data.Aeson hiding ((.=))
85 import Data.Either (Either(..))
86 import Data.Foldable
87 import Data.Map.Strict (Map)
88 import Data.Maybe (fromMaybe)
89 import Data.Monoid
90 import Data.Ord (Down(..))
91 import Data.Patch.Class (Action(act), Transformable(..), ours)
92 import Data.Swagger hiding (version, patch)
93 import Data.Text (Text, isInfixOf, unpack, pack)
94 import Data.Text.Lazy.IO as DTL
95 import Formatting (hprint, int, (%))
96 import GHC.Generics (Generic)
97 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
98 import Gargantext.API.Admin.Types (HasSettings)
99 import Gargantext.API.Job
100 import Gargantext.API.Ngrams.Types
101 import Gargantext.API.Prelude
102 import Gargantext.Core.NodeStory
103 import Gargantext.Core.Mail.Types (HasMail)
104 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
105 import Gargantext.API.Ngrams.Tools
106 import Gargantext.Database.Action.Flow.Types
107 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
108 import Gargantext.Database.Admin.Config (userMaster)
109 import Gargantext.Database.Admin.Types.Node (NodeType(..))
110 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
111 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
112 import Gargantext.Database.Query.Table.Node (getNode)
113 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
114 import Gargantext.Database.Query.Table.Node.Select
115 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
116 import Gargantext.Prelude hiding (log)
117 import Gargantext.Prelude.Clock (hasTime, getTime)
118 import Prelude (error)
119 import Servant hiding (Patch)
120 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
121 import System.IO (stderr)
122 import Test.QuickCheck (elements)
123 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
124 import qualified Data.Aeson.Text as DAT
125 import qualified Data.List as List
126 import qualified Data.Map.Strict as Map
127 import qualified Data.Map.Strict.Patch as PM
128 import qualified Data.Set as S
129 import qualified Data.Set as Set
130 import qualified Gargantext.API.Metrics as Metrics
131 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
132
133 {-
134 -- TODO sequences of modifications (Patchs)
135 type NgramsIdPatch = Patch NgramsId NgramsPatch
136
137 ngramsPatch :: Int -> NgramsPatch
138 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
139
140 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
141 toEdit n p = Edit n p
142 ngramsIdPatch :: Patch NgramsId NgramsPatch
143 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
144 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
145 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
146 ]
147
148 -- applyPatchBack :: Patch -> IO Patch
149 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
150 -}
151 ------------------------------------------------------------------------
152 ------------------------------------------------------------------------
153 ------------------------------------------------------------------------
154
155 {-
156 -- TODO: Replace.old is ignored which means that if the current list
157 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
158 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
159 -- However this should not happen in non conflicting situations.
160 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
161 mkListsUpdate nt patches =
162 [ (ngramsTypeId nt, ng, listTypeId lt)
163 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
164 , lt <- patch ^.. patch_list . new
165 ]
166
167 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
168 -> NgramsType
169 -> NgramsTablePatch
170 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
171 mkChildrenGroups addOrRem nt patches =
172 [ (ngramsTypeId nt, parent, child)
173 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
174 , child <- patch ^.. patch_children . to addOrRem . folded
175 ]
176 -}
177
178 ------------------------------------------------------------------------
179
180 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
181 => m ()
182 saveNodeStory = liftBase =<< view hasNodeStorySaver
183
184
185 listTypeConflictResolution :: ListType -> ListType -> ListType
186 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
187
188
189 ngramsStatePatchConflictResolution
190 :: TableNgrams.NgramsType
191 -> NgramsTerm
192 -> ConflictResolutionNgramsPatch
193 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
194 = (ours, (const ours, ours), (False, False))
195 -- (False, False) mean here that Mod has always priority.
196 -- (True, False) <- would mean priority to the left (same as ours).
197 -- undefined {- TODO think this through -}, listTypeConflictResolution)
198
199
200
201
202 -- Current state:
203 -- Insertions are not considered as patches,
204 -- they do not extend history,
205 -- they do not bump version.
206 insertNewOnly :: a -> Maybe b -> a
207 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
208 -- TODO error handling
209
210 {- unused
211 -- TODO refactor with putListNgrams
212 copyListNgrams :: RepoCmdM env err m
213 => NodeId -> NodeId -> NgramsType
214 -> m ()
215 copyListNgrams srcListId dstListId ngramsType = do
216 var <- view repoVar
217 liftBase $ modifyMVar_ var $
218 pure . (r_state . at ngramsType %~ (Just . f . something))
219 saveNodeStory
220 where
221 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
222 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
223
224 -- TODO refactor with putListNgrams
225 -- The list must be non-empty!
226 -- The added ngrams must be non-existent!
227 addListNgrams :: RepoCmdM env err m
228 => NodeId -> NgramsType
229 -> [NgramsElement] -> m ()
230 addListNgrams listId ngramsType nes = do
231 var <- view repoVar
232 liftBase $ modifyMVar_ var $
233 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
234 saveNodeStory
235 where
236 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
237 -}
238
239 -- | TODO: incr the Version number
240 -- && should use patch
241 -- UNSAFE
242
243 setListNgrams :: HasNodeStory env err m
244 => NodeId
245 -> TableNgrams.NgramsType
246 -> Map NgramsTerm NgramsRepoElement
247 -> m ()
248 setListNgrams listId ngramsType ns = do
249 -- printDebug "[setListNgrams]" (listId, ngramsType)
250 getter <- view hasNodeStory
251 var <- liftBase $ (getter ^. nse_getter) [listId]
252 liftBase $ modifyMVar_ var $
253 pure . ( unNodeStory
254 . at listId . _Just
255 . a_state
256 . at ngramsType
257 .~ Just ns
258 )
259 saveNodeStory
260
261
262 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
263 newNgramsFromNgramsStatePatch p =
264 [ text2ngrams (unNgramsTerm n)
265 | (n,np) <- p ^.. _PatchMap
266 -- . each . _PatchMap
267 . each . _NgramsTablePatch
268 . _PatchMap . ifolded . withIndex
269 , _ <- np ^.. patch_new . _Just
270 ]
271
272
273
274
275 commitStatePatch :: (HasNodeStory env err m, HasMail env)
276 => ListId
277 -> Versioned NgramsStatePatch'
278 -> m (Versioned NgramsStatePatch')
279 commitStatePatch listId (Versioned _p_version p) = do
280 -- printDebug "[commitStatePatch]" listId
281 var <- getNodeStoryVar [listId]
282 vq' <- liftBase $ modifyMVar var $ \ns -> do
283 let
284 a = ns ^. unNodeStory . at listId . _Just
285 -- apply patches from version p_version to a ^. a_version
286 -- TODO Check this
287 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
288 q = mconcat $ a ^. a_history
289 (p', q') = transformWith ngramsStatePatchConflictResolution p q
290 a' = a & a_version +~ 1
291 & a_state %~ act p'
292 & a_history %~ (p' :)
293
294 {-
295 -- Ideally we would like to check these properties. However:
296 -- * They should be checked only to debug the code. The client data
297 -- should be able to trigger these.
298 -- * What kind of error should they throw (we are in IO here)?
299 -- * Should we keep modifyMVar?
300 -- * Should we throw the validation in an Exception, catch it around
301 -- modifyMVar and throw it back as an Error?
302 assertValid $ transformable p q
303 assertValid $ applicable p' (r ^. r_state)
304 -}
305 printDebug "[commitStatePatch] a version" (a ^. a_version)
306 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
307 pure ( ns & unNodeStory . at listId .~ (Just a')
308 , Versioned (a' ^. a_version) q'
309 )
310 saveNodeStory
311 -- Save new ngrams
312 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
313
314 pure vq'
315
316
317
318 -- This is a special case of tableNgramsPut where the input patch is empty.
319 tableNgramsPull :: HasNodeStory env err m
320 => ListId
321 -> TableNgrams.NgramsType
322 -> Version
323 -> m (Versioned NgramsTablePatch)
324 tableNgramsPull listId ngramsType p_version = do
325 printDebug "[tableNgramsPull]" (listId, ngramsType)
326 var <- getNodeStoryVar [listId]
327 r <- liftBase $ readMVar var
328
329 let
330 a = r ^. unNodeStory . at listId . _Just
331 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
332 q_table = q ^. _PatchMap . at ngramsType . _Just
333
334 pure (Versioned (a ^. a_version) q_table)
335
336
337
338
339 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
340 -- Apply the given patch to the DB and returns the patch to be applied on the
341 -- client.
342 -- TODO-ACCESS check
343 tableNgramsPut :: ( HasNodeStory env err m
344 , HasInvalidError err
345 , HasSettings env
346 , HasMail env
347 )
348 => TabType
349 -> ListId
350 -> Versioned NgramsTablePatch
351 -> m (Versioned NgramsTablePatch)
352 tableNgramsPut tabType listId (Versioned p_version p_table)
353 | p_table == mempty = do
354 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
355 let ngramsType = ngramsTypeFromTabType tabType
356 tableNgramsPull listId ngramsType p_version
357
358 | otherwise = do
359 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
360 let ngramsType = ngramsTypeFromTabType tabType
361 (p, p_validity) = PM.singleton ngramsType p_table
362
363 assertValid p_validity
364
365 ret <- commitStatePatch listId (Versioned p_version p)
366 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
367
368 pure ret
369
370
371
372 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
373 , FlowCmdM env err m
374 , HasNodeError err
375 , HasSettings env
376 )
377 => UpdateTableNgramsCharts
378 -> (JobLog -> m ())
379 -> m JobLog
380 tableNgramsPostChartsAsync utn logStatus = do
381 let tabType = utn ^. utn_tab_type
382 let listId = utn ^. utn_list_id
383
384 node <- getNode listId
385 let nId = node ^. node_id
386 _uId = node ^. node_user_id
387 mCId = node ^. node_parent_id
388
389 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
390 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
391
392 case mCId of
393 Nothing -> do
394 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
395 pure $ jobLogFail $ jobLogInit 1
396 Just cId -> do
397 case tabType of
398 Authors -> do
399 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
400 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
401 logRef
402 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
403 logRefSuccess
404
405 getRef
406 Institutes -> do
407 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
408 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
409 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
410 logRef
411 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
412 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
413 logRefSuccess
414 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
415 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
416 logRefSuccess
417 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
418 logRefSuccess
419
420 getRef
421 Sources -> do
422 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
423 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
424 logRef
425 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
426 logRefSuccess
427
428 getRef
429 Terms -> do
430 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
431 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
432 logRef
433 {-
434 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
435 logRefSuccess
436 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
437 logRefSuccess
438 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
439 logRefSuccess
440 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
441 logRefSuccess
442 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
443 logRefSuccess
444 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
445 -}
446 logRefSuccess
447
448 getRef
449 _ -> do
450 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
451 pure $ jobLogFail $ jobLogInit 1
452
453 {-
454 { _ne_list :: ListType
455 If we merge the parents/children we can potentially create cycles!
456 , _ne_parent :: Maybe NgramsTerm
457 , _ne_children :: MSet NgramsTerm
458 }
459 -}
460
461 getNgramsTableMap :: HasNodeStory env err m
462 => NodeId
463 -> TableNgrams.NgramsType
464 -> m (Versioned NgramsTableMap)
465 getNgramsTableMap nodeId ngramsType = do
466 v <- getNodeStoryVar [nodeId]
467 repo <- liftBase $ readMVar v
468 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
469 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
470
471
472 dumpJsonTableMap :: HasNodeStory env err m
473 => Text
474 -> NodeId
475 -> TableNgrams.NgramsType
476 -> m ()
477 dumpJsonTableMap fpath nodeId ngramsType = do
478 m <- getNgramsTableMap nodeId ngramsType
479 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
480 pure ()
481
482
483 type MinSize = Int
484 type MaxSize = Int
485
486 -- | TODO Errors management
487 -- TODO: polymorphic for Annuaire or Corpus or ...
488 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
489 -- TODO: should take only one ListId
490
491
492 getTableNgrams :: forall env err m.
493 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
494 => NodeType -> NodeId -> TabType
495 -> ListId -> Limit -> Maybe Offset
496 -> Maybe ListType
497 -> Maybe MinSize -> Maybe MaxSize
498 -> Maybe OrderBy
499 -> (NgramsTerm -> Bool)
500 -> m (VersionedWithCount NgramsTable)
501 getTableNgrams _nType nId tabType listId limit_ offset
502 listType minSize maxSize orderBy searchQuery = do
503
504 t0 <- getTime
505 -- lIds <- selectNodesWithUsername NodeList userMaster
506 let
507 ngramsType = ngramsTypeFromTabType tabType
508 offset' = maybe 0 identity offset
509 listType' = maybe (const True) (==) listType
510 minSize' = maybe (const True) (<=) minSize
511 maxSize' = maybe (const True) (>=) maxSize
512
513 selected_node n = minSize' s
514 && maxSize' s
515 && searchQuery (n ^. ne_ngrams)
516 && listType' (n ^. ne_list)
517 where
518 s = n ^. ne_size
519
520 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
521
522 ---------------------------------------
523 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
524 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
525 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
526 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
527 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
528
529 ---------------------------------------
530 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
531 filteredNodes tableMap = rootOf <$> list & filter selected_node
532 where
533 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
534 (tableMap ^. at r)
535 )
536 (ne ^. ne_root)
537 list = tableMap ^.. each
538
539 ---------------------------------------
540 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
541 selectAndPaginate tableMap = roots <> inners
542 where
543 list = tableMap ^.. each
544 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
545 (tableMap ^. at r)
546 )
547 (ne ^. ne_root)
548 selected_nodes = list & take limit_
549 . drop offset'
550 . filter selected_node
551 . sortOnOrder orderBy
552 roots = rootOf <$> selected_nodes
553 rootsSet = Set.fromList (_ne_ngrams <$> roots)
554 inners = list & filter (selected_inner rootsSet)
555
556 ---------------------------------------
557 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
558 setScores False table = pure table
559 setScores True table = do
560 let ngrams_terms = table ^.. each . ne_ngrams
561 -- printDebug "ngrams_terms" ngrams_terms
562 t1 <- getTime
563 occurrences <- getOccByNgramsOnlyFast' nId
564 listId
565 ngramsType
566 ngrams_terms
567 --printDebug "occurrences" occurrences
568 t2 <- getTime
569 liftBase $ hprint stderr
570 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
571 (length ngrams_terms) t1 t2
572 let
573 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
574
575 pure $ table & each %~ setOcc
576 ---------------------------------------
577
578 -- lists <- catMaybes <$> listsWith userMaster
579 -- trace (show lists) $
580 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
581
582
583 let scoresNeeded = needsScores orderBy
584 tableMap1 <- getNgramsTableMap listId ngramsType
585 t1 <- getTime
586
587 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
588 . Map.mapWithKey ngramsElementFromRepo
589
590 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
591 . filteredNodes
592
593 let fltrCount = length $ fltr ^. v_data . _NgramsTable
594
595 t2 <- getTime
596 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
597 . setScores (not scoresNeeded)
598 . selectAndPaginate
599 t3 <- getTime
600 liftBase $ hprint stderr
601 ("getTableNgrams total=" % hasTime
602 % " map1=" % hasTime
603 % " map2=" % hasTime
604 % " map3=" % hasTime
605 % " sql=" % (if scoresNeeded then "map2" else "map3")
606 % "\n"
607 ) t0 t3 t0 t1 t1 t2 t2 t3
608 pure $ toVersionedWithCount fltrCount tableMap3
609
610
611
612 scoresRecomputeTableNgrams :: forall env err m.
613 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
614 => NodeId -> TabType -> ListId -> m Int
615 scoresRecomputeTableNgrams nId tabType listId = do
616 tableMap <- getNgramsTableMap listId ngramsType
617 _ <- tableMap & v_data %%~ setScores
618 . Map.mapWithKey ngramsElementFromRepo
619
620 pure $ 1
621 where
622 ngramsType = ngramsTypeFromTabType tabType
623
624 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
625 setScores table = do
626 let ngrams_terms = table ^.. each . ne_ngrams
627 occurrences <- getOccByNgramsOnlyFast' nId
628 listId
629 ngramsType
630 ngrams_terms
631 let
632 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
633
634 pure $ table & each %~ setOcc
635
636
637
638
639 -- APIs
640
641 -- TODO: find a better place for the code above, All APIs stay here
642
643 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
644 deriving (Generic, Enum, Bounded, Read, Show)
645
646 instance FromHttpApiData OrderBy
647 where
648 parseUrlPiece "TermAsc" = pure TermAsc
649 parseUrlPiece "TermDesc" = pure TermDesc
650 parseUrlPiece "ScoreAsc" = pure ScoreAsc
651 parseUrlPiece "ScoreDesc" = pure ScoreDesc
652 parseUrlPiece _ = Left "Unexpected value of OrderBy"
653
654 instance ToHttpApiData OrderBy where
655 toUrlPiece = pack . show
656
657 instance ToParamSchema OrderBy
658 instance FromJSON OrderBy
659 instance ToJSON OrderBy
660 instance ToSchema OrderBy
661 instance Arbitrary OrderBy
662 where
663 arbitrary = elements [minBound..maxBound]
664
665 needsScores :: Maybe OrderBy -> Bool
666 needsScores (Just ScoreAsc) = True
667 needsScores (Just ScoreDesc) = True
668 needsScores _ = False
669
670 type TableNgramsApiGet = Summary " Table Ngrams API Get"
671 :> QueryParamR "ngramsType" TabType
672 :> QueryParamR "list" ListId
673 :> QueryParamR "limit" Limit
674 :> QueryParam "offset" Offset
675 :> QueryParam "listType" ListType
676 :> QueryParam "minTermSize" MinSize
677 :> QueryParam "maxTermSize" MaxSize
678 :> QueryParam "orderBy" OrderBy
679 :> QueryParam "search" Text
680 :> Get '[JSON] (VersionedWithCount NgramsTable)
681
682 type TableNgramsApiPut = Summary " Table Ngrams API Change"
683 :> QueryParamR "ngramsType" TabType
684 :> QueryParamR "list" ListId
685 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
686 :> Put '[JSON] (Versioned NgramsTablePatch)
687
688 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
689 :> QueryParamR "ngramsType" TabType
690 :> QueryParamR "list" ListId
691 :> "recompute" :> Post '[JSON] Int
692
693 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
694 :> QueryParamR "ngramsType" TabType
695 :> QueryParamR "list" ListId
696 :> Get '[JSON] Version
697
698 type TableNgramsApi = TableNgramsApiGet
699 :<|> TableNgramsApiPut
700 :<|> RecomputeScoresNgramsApiGet
701 :<|> "version" :> TableNgramsApiGetVersion
702 :<|> TableNgramsAsyncApi
703
704 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
705 :> "async"
706 :> "charts"
707 :> "update"
708 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
709
710 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
711 => NodeId
712 -> TabType
713 -> ListId
714 -> Limit
715 -> Maybe Offset
716 -> Maybe ListType
717 -> Maybe MinSize -> Maybe MaxSize
718 -> Maybe OrderBy
719 -> Maybe Text -- full text search
720 -> m (VersionedWithCount NgramsTable)
721 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
722 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
723 where
724 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
725
726
727
728 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
729 => NodeId
730 -> TabType
731 -> ListId
732 -> m Version
733 getTableNgramsVersion _nId _tabType listId = currentVersion listId
734
735
736
737 -- TODO: limit?
738 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
739 -- This line above looks like a waste of computation to finally get only the version.
740 -- See the comment about listNgramsChangedSince.
741
742
743 -- | Text search is deactivated for now for ngrams by doc only
744 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
745 => DocId -> TabType
746 -> ListId -> Limit -> Maybe Offset
747 -> Maybe ListType
748 -> Maybe MinSize -> Maybe MaxSize
749 -> Maybe OrderBy
750 -> Maybe Text -- full text search
751 -> m (VersionedWithCount NgramsTable)
752 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
753 ns <- selectNodesWithUsername NodeList userMaster
754 let ngramsType = ngramsTypeFromTabType tabType
755 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
756 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
757 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
758
759
760
761 apiNgramsTableCorpus :: ( GargServerC env err m
762 )
763 => NodeId -> ServerT TableNgramsApi m
764 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
765 :<|> tableNgramsPut
766 :<|> scoresRecomputeTableNgrams cId
767 :<|> getTableNgramsVersion cId
768 :<|> apiNgramsAsync cId
769
770 apiNgramsTableDoc :: ( GargServerC env err m
771 )
772 => DocId -> ServerT TableNgramsApi m
773 apiNgramsTableDoc dId = getTableNgramsDoc dId
774 :<|> tableNgramsPut
775 :<|> scoresRecomputeTableNgrams dId
776 :<|> getTableNgramsVersion dId
777 :<|> apiNgramsAsync dId
778
779 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
780 apiNgramsAsync _dId =
781 serveJobsAPI $
782 JobFunction $ \i log ->
783 let
784 log' x = do
785 printDebug "tableNgramsPostChartsAsync" x
786 liftBase $ log x
787 in tableNgramsPostChartsAsync i log'
788
789 -- Did the given list of ngrams changed since the given version?
790 -- The returned value is versioned boolean value, meaning that one always retrieve the
791 -- latest version.
792 -- If the given version is negative then one simply receive the latest version and True.
793 -- Using this function is more precise than simply comparing the latest version number
794 -- with the local version number. Indeed there might be no change to this particular list
795 -- and still the version number has changed because of other lists.
796 --
797 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
798 -- * currentVersion: good computation, good bandwidth, bad precision.
799 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
800 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
801 listNgramsChangedSince :: HasNodeStory env err m
802 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
803 listNgramsChangedSince listId ngramsType version
804 | version < 0 =
805 Versioned <$> currentVersion listId <*> pure True
806 | otherwise =
807 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)