]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[FIX] temp resolution of commitpatch
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsStatePatch
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepo
56 , NgramsRepoElement(..)
57 , saveRepo
58 , initRepo
59
60 , RepoEnv(..)
61 , renv_var
62 , renv_lock
63
64 , TabType(..)
65
66 , HasRepoVar(..)
67 , HasRepoSaver(..)
68 , HasRepo(..)
69 , RepoCmdM
70 , QueryParamR
71 , TODO
72
73 -- Internals
74 , getNgramsTableMap
75 , dumpJsonTableMap
76 , tableNgramsPull
77 , tableNgramsPut
78
79 , Version
80 , Versioned(..)
81 , VersionedWithCount(..)
82 , currentVersion
83 , listNgramsChangedSince
84 )
85 where
86
87 import Control.Concurrent
88 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
89 import Control.Monad.Reader
90 import Data.Aeson hiding ((.=))
91 import Data.Either (Either(..))
92 import Data.Foldable
93 import Data.Map.Strict (Map)
94 import Data.Maybe (fromMaybe)
95 import Data.Monoid
96 import Data.Ord (Down(..))
97 import Data.Patch.Class (Action(act), Transformable(..), ours)
98 import Data.Swagger hiding (version, patch)
99 import Data.Text (Text, isInfixOf, unpack)
100 import Data.Text.Lazy.IO as DTL
101 import Formatting (hprint, int, (%))
102 import GHC.Generics (Generic)
103 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
104 import Gargantext.API.Admin.Types (HasSettings)
105 import Gargantext.API.Job
106 import Gargantext.API.Ngrams.Types
107 import Gargantext.API.Prelude
108 import Gargantext.Core.NodeStory
109 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
110 import Gargantext.API.Ngrams.Tools
111 import Gargantext.Core.Utils (something)
112 import Gargantext.Database.Action.Flow.Types
113 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
114 import Gargantext.Database.Admin.Config (userMaster)
115 import Gargantext.Database.Admin.Types.Node (NodeType(..))
116 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
117 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
118 import Gargantext.Database.Query.Table.Node (getNode)
119 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
120 import Gargantext.Database.Query.Table.Node.Select
121 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
122 import Gargantext.Prelude hiding (log)
123 import Gargantext.Prelude.Clock (hasTime, getTime)
124 import Prelude (error)
125 import Servant hiding (Patch)
126 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
127 import System.IO (stderr)
128 import Test.QuickCheck (elements)
129 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
130 import qualified Data.Aeson.Text as DAT
131 import qualified Data.List as List
132 import qualified Data.Map.Strict as Map
133 import qualified Data.Map.Strict.Patch as PM
134 import qualified Data.Set as S
135 import qualified Data.Set as Set
136 import qualified Gargantext.API.Metrics as Metrics
137 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
138
139 {-
140 -- TODO sequences of modifications (Patchs)
141 type NgramsIdPatch = Patch NgramsId NgramsPatch
142
143 ngramsPatch :: Int -> NgramsPatch
144 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
145
146 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
147 toEdit n p = Edit n p
148 ngramsIdPatch :: Patch NgramsId NgramsPatch
149 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
150 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
151 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
152 ]
153
154 -- applyPatchBack :: Patch -> IO Patch
155 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
156 -}
157 ------------------------------------------------------------------------
158 ------------------------------------------------------------------------
159 ------------------------------------------------------------------------
160
161 {-
162 -- TODO: Replace.old is ignored which means that if the current list
163 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
164 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
165 -- However this should not happen in non conflicting situations.
166 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
167 mkListsUpdate nt patches =
168 [ (ngramsTypeId nt, ng, listTypeId lt)
169 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
170 , lt <- patch ^.. patch_list . new
171 ]
172
173 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
174 -> NgramsType
175 -> NgramsTablePatch
176 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
177 mkChildrenGroups addOrRem nt patches =
178 [ (ngramsTypeId nt, parent, child)
179 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
180 , child <- patch ^.. patch_children . to addOrRem . folded
181 ]
182 -}
183
184 ------------------------------------------------------------------------
185
186 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
187 => m ()
188 saveRepo = liftBase =<< view repoSaver
189 saveRepo' :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
190 => m ()
191 saveRepo' = liftBase =<< view hasNodeStorySaver
192
193
194
195 listTypeConflictResolution :: ListType -> ListType -> ListType
196 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
197
198 ngramsStatePatchConflictResolution
199 :: TableNgrams.NgramsType
200 -> NodeId
201 -> NgramsTerm
202 -> ConflictResolutionNgramsPatch
203 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
204 = (ours, (const ours, ours), (False, False))
205 -- (False, False) mean here that Mod has always priority.
206 -- (True, False) <- would mean priority to the left (same as ours).
207 -- undefined {- TODO think this through -}, listTypeConflictResolution)
208
209 ngramsStatePatchConflictResolution'
210 :: TableNgrams.NgramsType
211 -> NgramsTerm
212 -> ConflictResolutionNgramsPatch
213 ngramsStatePatchConflictResolution' _ngramsType _ngramsTerm
214 = (ours, (const ours, ours), (False, False))
215 -- (False, False) mean here that Mod has always priority.
216 -- (True, False) <- would mean priority to the left (same as ours).
217 -- undefined {- TODO think this through -}, listTypeConflictResolution)
218
219
220
221
222 -- Current state:
223 -- Insertions are not considered as patches,
224 -- they do not extend history,
225 -- they do not bump version.
226 insertNewOnly :: a -> Maybe b -> a
227 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
228 -- TODO error handling
229
230 {- unused
231 -- TODO refactor with putListNgrams
232 copyListNgrams :: RepoCmdM env err m
233 => NodeId -> NodeId -> NgramsType
234 -> m ()
235 copyListNgrams srcListId dstListId ngramsType = do
236 var <- view repoVar
237 liftBase $ modifyMVar_ var $
238 pure . (r_state . at ngramsType %~ (Just . f . something))
239 saveRepo
240 where
241 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
242 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
243
244 -- TODO refactor with putListNgrams
245 -- The list must be non-empty!
246 -- The added ngrams must be non-existent!
247 addListNgrams :: RepoCmdM env err m
248 => NodeId -> NgramsType
249 -> [NgramsElement] -> m ()
250 addListNgrams listId ngramsType nes = do
251 var <- view repoVar
252 liftBase $ modifyMVar_ var $
253 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
254 saveRepo
255 where
256 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
257 -}
258
259 -- | TODO: incr the Version number
260 -- && should use patch
261 -- UNSAFE
262
263 setListNgrams :: RepoCmdM env err m
264 => NodeId
265 -> TableNgrams.NgramsType
266 -> Map NgramsTerm NgramsRepoElement
267 -> m ()
268 setListNgrams listId ngramsType ns = do
269 var <- view repoVar
270 liftBase $ modifyMVar_ var $
271 pure . ( r_state
272 . at ngramsType
273 %~ Just . (at listId .~ Just ns) . something
274 )
275 printDebug "List modified" NodeList
276 saveRepo
277 setListNgrams' :: HasNodeStory env err m
278 => NodeId
279 -> TableNgrams.NgramsType
280 -> Map NgramsTerm NgramsRepoElement
281 -> m ()
282 setListNgrams' listId ngramsType ns = do
283 getter <- view hasNodeStory
284 var <- liftBase $ (getter ^. nse_getter) listId
285 liftBase $ modifyMVar_ var $
286 pure . ( unNodeStory
287 . at listId . _Just
288 . a_state
289 . at ngramsType
290 .~ Just ns
291 )
292 saveRepo'
293
294
295 currentVersion :: RepoCmdM env err m
296 => m Version
297 currentVersion = do
298 var <- view repoVar
299 r <- liftBase $ readMVar var
300 pure $ r ^. r_version
301 currentVersion' :: HasNodeStory env err m
302 => ListId -> m Version
303 currentVersion' listId = do
304 nls <- getRepo' [listId]
305 pure $ nls ^. unNodeStory . at listId . _Just . a_version
306
307
308
309
310 newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
311 newNgramsFromNgramsStatePatch p =
312 [ text2ngrams (unNgramsTerm n)
313 | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
314 , _ <- np ^.. patch_new . _Just
315 ]
316 newNgramsFromNgramsStatePatch' :: NgramsStatePatch' -> [Ngrams]
317 newNgramsFromNgramsStatePatch' p =
318 [ text2ngrams (unNgramsTerm n)
319 | (n,np) <- p ^.. _PatchMap
320 -- . each . _PatchMap
321 . each . _NgramsTablePatch
322 . _PatchMap . ifolded . withIndex
323 , _ <- np ^.. patch_new . _Just
324 ]
325
326
327 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
328 commitStatePatch :: RepoCmdM env err m
329 => Versioned NgramsStatePatch
330 -> m (Versioned NgramsStatePatch)
331 commitStatePatch (Versioned p_version p) = do
332 var <- view repoVar
333 vq' <- liftBase $ modifyMVar var $ \r -> do
334 let
335 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
336 (p', q') = transformWith ngramsStatePatchConflictResolution p q
337 r' = r & r_version +~ 1
338 & r_state %~ act p'
339 & r_history %~ (p' :)
340 {-
341 -- Ideally we would like to check these properties. However:
342 -- * They should be checked only to debug the code. The client data
343 -- should be able to trigger these.
344 -- * What kind of error should they throw (we are in IO here)?
345 -- * Should we keep modifyMVar?
346 -- * Should we throw the validation in an Exception, catch it around
347 -- modifyMVar and throw it back as an Error?
348 assertValid $ transformable p q
349 assertValid $ applicable p' (r ^. r_state)
350 -}
351 pure (r', Versioned (r' ^. r_version) q')
352
353 saveRepo
354
355 -- Save new ngrams
356 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
357
358 pure vq'
359
360 commitStatePatch' :: HasNodeStory env err m
361 => ListId
362 -> Versioned NgramsStatePatch'
363 -> m (Versioned NgramsStatePatch')
364 commitStatePatch' listId (Versioned p_version p) = do
365 var <- getRepoVar listId
366 vq' <- liftBase $ modifyMVar var $ \ns -> do
367 let
368 a = ns ^. unNodeStory . at listId . _Just
369 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
370 (p', q') = transformWith ngramsStatePatchConflictResolution' p q
371 a' = a & a_version +~ 1
372 & a_state %~ act p'
373 & a_history %~ (p' :)
374 pure ( ns & unNodeStory . at listId .~ (Just a')
375 , Versioned (a' ^. a_version) q'
376 )
377 saveRepo'
378 -- Save new ngrams
379 _ <- insertNgrams (newNgramsFromNgramsStatePatch' p)
380
381 pure $ vq'
382
383
384
385 -- This is a special case of tableNgramsPut where the input patch is empty.
386 tableNgramsPull :: RepoCmdM env err m
387 => ListId
388 -> TableNgrams.NgramsType
389 -> Version
390 -> m (Versioned NgramsTablePatch)
391 tableNgramsPull listId ngramsType p_version = do
392 var <- view repoVar
393 r <- liftBase $ readMVar var
394
395 let
396 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
397 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
398
399 pure (Versioned (r ^. r_version) q_table)
400
401 -- Apply the given patch to the DB and returns the patch to be applied on the
402 -- client.
403 -- TODO-ACCESS check
404 tableNgramsPut :: ( FlowCmdM env err m
405 , HasSettings env
406 )
407 => TabType
408 -> ListId
409 -> Versioned NgramsTablePatch
410 -> m (Versioned NgramsTablePatch)
411 tableNgramsPut tabType listId (Versioned p_version p_table)
412 | p_table == mempty = do
413 let ngramsType = ngramsTypeFromTabType tabType
414 tableNgramsPull listId ngramsType p_version
415
416 | otherwise = do
417 let ngramsType = ngramsTypeFromTabType tabType
418 (p0, p0_validity) = PM.singleton listId p_table
419 (p, p_validity) = PM.singleton ngramsType p0
420
421 assertValid p0_validity
422 assertValid p_validity
423
424 ret <- commitStatePatch (Versioned p_version p)
425 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
426
427 pure ret
428
429
430 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
431 , HasNodeError err
432 , HasSettings env
433 )
434 => UpdateTableNgramsCharts
435 -> (JobLog -> m ())
436 -> m JobLog
437 tableNgramsPostChartsAsync utn logStatus = do
438 let tabType = utn ^. utn_tab_type
439 let listId = utn ^. utn_list_id
440
441 node <- getNode listId
442 let nId = node ^. node_id
443 _uId = node ^. node_user_id
444 mCId = node ^. node_parent_id
445
446 printDebug "[tableNgramsPut] tabType" tabType
447 printDebug "[tableNgramsPut] listId" listId
448
449 case mCId of
450 Nothing -> do
451 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
452 pure $ jobLogFail $ jobLogInit 1
453 Just cId -> do
454 case tabType of
455 Authors -> do
456 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
457 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
458 logRef
459 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
460 logRefSuccess
461
462 getRef
463 Institutes -> do
464 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
465 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
466 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
467 logRef
468 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
469 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
470 logRefSuccess
471 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
472 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
473 logRefSuccess
474 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
475 logRefSuccess
476
477 getRef
478 Sources -> do
479 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
480 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
481 logRef
482 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
483 logRefSuccess
484
485 getRef
486 Terms -> do
487 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
488 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
489 logRef
490 {-
491 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
492 logRefSuccess
493 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
494 logRefSuccess
495 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
496 logRefSuccess
497 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
498 logRefSuccess
499 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
500 logRefSuccess
501 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
502 -}
503 logRefSuccess
504
505 getRef
506 _ -> do
507 printDebug "[tableNgramsPut] no update for tabType = " tabType
508 pure $ jobLogFail $ jobLogInit 1
509
510 {-
511 { _ne_list :: ListType
512 If we merge the parents/children we can potentially create cycles!
513 , _ne_parent :: Maybe NgramsTerm
514 , _ne_children :: MSet NgramsTerm
515 }
516 -}
517
518 getNgramsTableMap :: RepoCmdM env err m
519 => NodeId
520 -> TableNgrams.NgramsType
521 -> m (Versioned NgramsTableMap)
522 getNgramsTableMap nodeId ngramsType = do
523 v <- view repoVar
524 repo <- liftBase $ readMVar v
525 pure $ Versioned (repo ^. r_version)
526 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
527
528 dumpJsonTableMap :: RepoCmdM env err m
529 => Text
530 -> NodeId
531 -> TableNgrams.NgramsType
532 -> m ()
533 dumpJsonTableMap fpath nodeId ngramsType = do
534 m <- getNgramsTableMap nodeId ngramsType
535 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
536 pure ()
537
538 type MinSize = Int
539 type MaxSize = Int
540
541 -- | TODO Errors management
542 -- TODO: polymorphic for Annuaire or Corpus or ...
543 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
544 -- TODO: should take only one ListId
545
546
547 getTableNgrams :: forall env err m.
548 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
549 => NodeType -> NodeId -> TabType
550 -> ListId -> Limit -> Maybe Offset
551 -> Maybe ListType
552 -> Maybe MinSize -> Maybe MaxSize
553 -> Maybe OrderBy
554 -> (NgramsTerm -> Bool)
555 -> m (VersionedWithCount NgramsTable)
556 getTableNgrams _nType nId tabType listId limit_ offset
557 listType minSize maxSize orderBy searchQuery = do
558
559 t0 <- getTime
560 -- lIds <- selectNodesWithUsername NodeList userMaster
561 let
562 ngramsType = ngramsTypeFromTabType tabType
563 offset' = maybe 0 identity offset
564 listType' = maybe (const True) (==) listType
565 minSize' = maybe (const True) (<=) minSize
566 maxSize' = maybe (const True) (>=) maxSize
567
568 selected_node n = minSize' s
569 && maxSize' s
570 && searchQuery (n ^. ne_ngrams)
571 && listType' (n ^. ne_list)
572 where
573 s = n ^. ne_size
574
575 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
576
577 ---------------------------------------
578 sortOnOrder Nothing = identity
579 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
580 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
581 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
582 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
583
584 ---------------------------------------
585
586 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
587 filteredNodes tableMap = rootOf <$> list & filter selected_node
588 where
589 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
590 (tableMap ^. at r)
591 )
592 (ne ^. ne_root)
593 list = tableMap ^.. each
594
595 ---------------------------------------
596 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
597 selectAndPaginate tableMap = roots <> inners
598 where
599 list = tableMap ^.. each
600 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
601 (tableMap ^. at r)
602 )
603 (ne ^. ne_root)
604 selected_nodes = list & take limit_
605 . drop offset'
606 . filter selected_node
607 . sortOnOrder orderBy
608 roots = rootOf <$> selected_nodes
609 rootsSet = Set.fromList (_ne_ngrams <$> roots)
610 inners = list & filter (selected_inner rootsSet)
611
612 ---------------------------------------
613 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
614 setScores False table = pure table
615 setScores True table = do
616 let ngrams_terms = table ^.. each . ne_ngrams
617 t1 <- getTime
618 occurrences <- getOccByNgramsOnlyFast' nId
619 listId
620 ngramsType
621 ngrams_terms
622 t2 <- getTime
623 liftBase $ hprint stderr
624 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
625 (length ngrams_terms) t1 t2
626 {-
627 occurrences <- getOccByNgramsOnlySlow nType nId
628 (lIds <> [listId])
629 ngramsType
630 ngrams_terms
631 -}
632 let
633 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
634
635 pure $ table & each %~ setOcc
636 ---------------------------------------
637
638 -- lists <- catMaybes <$> listsWith userMaster
639 -- trace (show lists) $
640 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
641
642
643 let scoresNeeded = needsScores orderBy
644 tableMap1 <- getNgramsTableMap listId ngramsType
645 t1 <- getTime
646 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
647 . Map.mapWithKey ngramsElementFromRepo
648
649 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
650 . filteredNodes
651 let fltrCount = length $ fltr ^. v_data . _NgramsTable
652
653 t2 <- getTime
654 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
655 . setScores (not scoresNeeded)
656 . selectAndPaginate
657 t3 <- getTime
658 liftBase $ hprint stderr
659 ("getTableNgrams total=" % hasTime
660 % " map1=" % hasTime
661 % " map2=" % hasTime
662 % " map3=" % hasTime
663 % " sql=" % (if scoresNeeded then "map2" else "map3")
664 % "\n"
665 ) t0 t3 t0 t1 t1 t2 t2 t3
666 pure $ toVersionedWithCount fltrCount tableMap3
667
668
669 scoresRecomputeTableNgrams :: forall env err m.
670 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
671 => NodeId -> TabType -> ListId -> m Int
672 scoresRecomputeTableNgrams nId tabType listId = do
673 tableMap <- getNgramsTableMap listId ngramsType
674 _ <- tableMap & v_data %%~ setScores
675 . Map.mapWithKey ngramsElementFromRepo
676
677 pure $ 1
678 where
679 ngramsType = ngramsTypeFromTabType tabType
680
681 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
682 setScores table = do
683 let ngrams_terms = table ^.. each . ne_ngrams
684 occurrences <- getOccByNgramsOnlyFast' nId
685 listId
686 ngramsType
687 ngrams_terms
688 let
689 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
690
691 pure $ table & each %~ setOcc
692
693
694
695 -- APIs
696
697 -- TODO: find a better place for the code above, All APIs stay here
698
699 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
700 deriving (Generic, Enum, Bounded, Read, Show)
701
702 instance FromHttpApiData OrderBy
703 where
704 parseUrlPiece "TermAsc" = pure TermAsc
705 parseUrlPiece "TermDesc" = pure TermDesc
706 parseUrlPiece "ScoreAsc" = pure ScoreAsc
707 parseUrlPiece "ScoreDesc" = pure ScoreDesc
708 parseUrlPiece _ = Left "Unexpected value of OrderBy"
709
710
711 instance ToParamSchema OrderBy
712 instance FromJSON OrderBy
713 instance ToJSON OrderBy
714 instance ToSchema OrderBy
715 instance Arbitrary OrderBy
716 where
717 arbitrary = elements [minBound..maxBound]
718
719 needsScores :: Maybe OrderBy -> Bool
720 needsScores (Just ScoreAsc) = True
721 needsScores (Just ScoreDesc) = True
722 needsScores _ = False
723
724 type TableNgramsApiGet = Summary " Table Ngrams API Get"
725 :> QueryParamR "ngramsType" TabType
726 :> QueryParamR "list" ListId
727 :> QueryParamR "limit" Limit
728 :> QueryParam "offset" Offset
729 :> QueryParam "listType" ListType
730 :> QueryParam "minTermSize" MinSize
731 :> QueryParam "maxTermSize" MaxSize
732 :> QueryParam "orderBy" OrderBy
733 :> QueryParam "search" Text
734 :> Get '[JSON] (VersionedWithCount NgramsTable)
735
736 type TableNgramsApiPut = Summary " Table Ngrams API Change"
737 :> QueryParamR "ngramsType" TabType
738 :> QueryParamR "list" ListId
739 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
740 :> Put '[JSON] (Versioned NgramsTablePatch)
741
742 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
743 :> QueryParamR "ngramsType" TabType
744 :> QueryParamR "list" ListId
745 :> "recompute" :> Post '[JSON] Int
746
747 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
748 :> QueryParamR "ngramsType" TabType
749 :> QueryParamR "list" ListId
750 :> Get '[JSON] Version
751
752 type TableNgramsApi = TableNgramsApiGet
753 :<|> TableNgramsApiPut
754 :<|> RecomputeScoresNgramsApiGet
755 :<|> "version" :> TableNgramsApiGetVersion
756 :<|> TableNgramsAsyncApi
757
758 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
759 :> "async"
760 :> "charts"
761 :> "update"
762 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
763
764 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
765 => NodeId
766 -> TabType
767 -> ListId
768 -> Limit
769 -> Maybe Offset
770 -> Maybe ListType
771 -> Maybe MinSize -> Maybe MaxSize
772 -> Maybe OrderBy
773 -> Maybe Text -- full text search
774 -> m (VersionedWithCount NgramsTable)
775 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
776 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
777 where
778 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
779
780 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
781 => NodeId
782 -> TabType
783 -> ListId
784 -> m Version
785 getTableNgramsVersion _nId _tabType _listId = currentVersion
786 -- TODO: limit?
787 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
788 -- This line above looks like a waste of computation to finally get only the version.
789 -- See the comment about listNgramsChangedSince.
790
791
792 -- | Text search is deactivated for now for ngrams by doc only
793 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
794 => DocId -> TabType
795 -> ListId -> Limit -> Maybe Offset
796 -> Maybe ListType
797 -> Maybe MinSize -> Maybe MaxSize
798 -> Maybe OrderBy
799 -> Maybe Text -- full text search
800 -> m (VersionedWithCount NgramsTable)
801 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
802 ns <- selectNodesWithUsername NodeList userMaster
803 let ngramsType = ngramsTypeFromTabType tabType
804 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
805 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
806 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
807
808
809
810 apiNgramsTableCorpus :: ( GargServerC env err m
811 )
812 => NodeId -> ServerT TableNgramsApi m
813 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
814 :<|> tableNgramsPut
815 :<|> scoresRecomputeTableNgrams cId
816 :<|> getTableNgramsVersion cId
817 :<|> apiNgramsAsync cId
818
819 apiNgramsTableDoc :: ( GargServerC env err m
820 )
821 => DocId -> ServerT TableNgramsApi m
822 apiNgramsTableDoc dId = getTableNgramsDoc dId
823 :<|> tableNgramsPut
824 :<|> scoresRecomputeTableNgrams dId
825 :<|> getTableNgramsVersion dId
826 :<|> apiNgramsAsync dId
827 -- > index all the corpus accordingly (TODO AD)
828
829 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
830 apiNgramsAsync _dId =
831 serveJobsAPI $
832 JobFunction $ \i log ->
833 let
834 log' x = do
835 printDebug "tableNgramsPostChartsAsync" x
836 liftBase $ log x
837 in tableNgramsPostChartsAsync i log'
838
839 -- Did the given list of ngrams changed since the given version?
840 -- The returned value is versioned boolean value, meaning that one always retrieve the
841 -- latest version.
842 -- If the given version is negative then one simply receive the latest version and True.
843 -- Using this function is more precise than simply comparing the latest version number
844 -- with the local version number. Indeed there might be no change to this particular list
845 -- and still the version number has changed because of other lists.
846 --
847 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
848 -- * currentVersion: good computation, good bandwidth, bad precision.
849 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
850 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
851 listNgramsChangedSince :: RepoCmdM env err m
852 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
853 listNgramsChangedSince listId ngramsType version
854 | version < 0 =
855 Versioned <$> currentVersion <*> pure True
856 | otherwise =
857 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
858
859
860
861