]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Update README.md
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , getNgramsTable'
73 , setNgramsTableScores
74
75 , Version
76 , Versioned(..)
77 , VersionedWithCount(..)
78 , currentVersion
79 , listNgramsChangedSince
80 , MinSize, MaxSize, OrderBy, NgramsTable
81 , UpdateTableNgramsCharts
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import Data.Either (Either(..))
90 import Data.Foldable
91 import Data.Map.Strict (Map)
92 import Data.Maybe (fromMaybe)
93 import Data.Monoid
94 import Data.Ord (Down(..))
95 import Data.Patch.Class (Action(act), Transformable(..), ours)
96 import Data.Swagger hiding (version, patch)
97 import Data.Text (Text, isInfixOf, toLower, unpack, pack)
98 import Data.Text.Lazy.IO as DTL
99 import Formatting (hprint, int, (%))
100 import GHC.Generics (Generic)
101 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
102 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
103 import Gargantext.API.Admin.Types (HasSettings)
104 import Gargantext.API.Job
105 import Gargantext.API.Ngrams.Types
106 import Gargantext.API.Prelude
107 import Gargantext.Core.NodeStory
108 import Gargantext.Core.Mail.Types (HasMail)
109 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
110 import Gargantext.API.Ngrams.Tools
111 import Gargantext.Database.Action.Flow.Types
112 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
113 import Gargantext.Database.Admin.Config (userMaster)
114 import Gargantext.Database.Admin.Types.Node (NodeType(..))
115 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
116 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
117 import Gargantext.Database.Query.Table.Node (getNode)
118 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
119 import Gargantext.Database.Query.Table.Node.Select
120 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
121 import Gargantext.Prelude hiding (log)
122 import Gargantext.Prelude.Clock (hasTime, getTime)
123 import Prelude (error)
124 import Servant hiding (Patch)
125 import Gargantext.Utils.Jobs (serveJobsAPI)
126 import System.IO (stderr)
127 import Test.QuickCheck (elements)
128 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
129 import qualified Data.Aeson.Text as DAT
130 import qualified Data.List as List
131 import qualified Data.Map.Strict as Map
132 import qualified Data.Map.Strict.Patch as PM
133 import qualified Data.Set as S
134 import qualified Data.Set as Set
135 import qualified Gargantext.API.Metrics as Metrics
136 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
137
138 {-
139 -- TODO sequences of modifications (Patchs)
140 type NgramsIdPatch = Patch NgramsId NgramsPatch
141
142 ngramsPatch :: Int -> NgramsPatch
143 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
144
145 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
146 toEdit n p = Edit n p
147 ngramsIdPatch :: Patch NgramsId NgramsPatch
148 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
149 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
150 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
151 ]
152
153 -- applyPatchBack :: Patch -> IO Patch
154 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
155 -}
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158 ------------------------------------------------------------------------
159
160 {-
161 -- TODO: Replace.old is ignored which means that if the current list
162 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
163 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
164 -- However this should not happen in non conflicting situations.
165 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
166 mkListsUpdate nt patches =
167 [ (ngramsTypeId nt, ng, listTypeId lt)
168 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
169 , lt <- patch ^.. patch_list . new
170 ]
171
172 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
173 -> NgramsType
174 -> NgramsTablePatch
175 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
176 mkChildrenGroups addOrRem nt patches =
177 [ (ngramsTypeId nt, parent, child)
178 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
179 , child <- patch ^.. patch_children . to addOrRem . folded
180 ]
181 -}
182
183 ------------------------------------------------------------------------
184
185 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
186 => m ()
187 saveNodeStory = do
188 saver <- view hasNodeStorySaver
189 liftBase $ do
190 --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
191 saver
192 --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
193
194
195 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
196 => m ()
197 saveNodeStoryImmediate = do
198 saver <- view hasNodeStoryImmediateSaver
199 liftBase $ do
200 --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
201 saver
202 --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
203
204 listTypeConflictResolution :: ListType -> ListType -> ListType
205 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
206
207
208 ngramsStatePatchConflictResolution
209 :: TableNgrams.NgramsType
210 -> NgramsTerm
211 -> ConflictResolutionNgramsPatch
212 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
213 = (ours, (const ours, ours), (False, False))
214 -- (False, False) mean here that Mod has always priority.
215 -- = (ours, (const ours, ours), (True, False))
216 -- (True, False) <- would mean priority to the left (same as ours).
217 -- undefined {- TODO think this through -}, listTypeConflictResolution)
218
219
220
221
222 -- Current state:
223 -- Insertions are not considered as patches,
224 -- they do not extend history,
225 -- they do not bump version.
226 insertNewOnly :: a -> Maybe b -> a
227 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
228 -- TODO error handling
229
230 {- unused
231 -- TODO refactor with putListNgrams
232 copyListNgrams :: RepoCmdM env err m
233 => NodeId -> NodeId -> NgramsType
234 -> m ()
235 copyListNgrams srcListId dstListId ngramsType = do
236 var <- view repoVar
237 liftBase $ modifyMVar_ var $
238 pure . (r_state . at ngramsType %~ (Just . f . something))
239 saveNodeStory
240 where
241 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
242 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
243
244 -- TODO refactor with putListNgrams
245 -- The list must be non-empty!
246 -- The added ngrams must be non-existent!
247 addListNgrams :: RepoCmdM env err m
248 => NodeId -> NgramsType
249 -> [NgramsElement] -> m ()
250 addListNgrams listId ngramsType nes = do
251 var <- view repoVar
252 liftBase $ modifyMVar_ var $
253 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
254 saveNodeStory
255 where
256 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
257 -}
258
259 -- | TODO: incr the Version number
260 -- && should use patch
261 -- UNSAFE
262
263 setListNgrams :: HasNodeStory env err m
264 => NodeId
265 -> TableNgrams.NgramsType
266 -> Map NgramsTerm NgramsRepoElement
267 -> m ()
268 setListNgrams listId ngramsType ns = do
269 -- printDebug "[setListNgrams]" (listId, ngramsType)
270 getter <- view hasNodeStory
271 var <- liftBase $ (getter ^. nse_getter) [listId]
272 liftBase $ modifyMVar_ var $
273 pure . ( unNodeStory
274 . at listId . _Just
275 . a_state
276 . at ngramsType
277 .~ Just ns
278 )
279 saveNodeStory
280
281
282 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
283 newNgramsFromNgramsStatePatch p =
284 [ text2ngrams (unNgramsTerm n)
285 | (n,np) <- p ^.. _PatchMap
286 -- . each . _PatchMap
287 . each . _NgramsTablePatch
288 . _PatchMap . ifolded . withIndex
289 , _ <- np ^.. patch_new . _Just
290 ]
291
292
293
294
295 commitStatePatch :: ( HasNodeStory env err m
296 , HasNodeStoryImmediateSaver env
297 , HasNodeArchiveStoryImmediateSaver env
298 , HasMail env)
299 => ListId
300 -> Versioned NgramsStatePatch'
301 -> m (Versioned NgramsStatePatch')
302 commitStatePatch listId (Versioned _p_version p) = do
303 -- printDebug "[commitStatePatch]" listId
304 var <- getNodeStoryVar [listId]
305 archiveSaver <- view hasNodeArchiveStoryImmediateSaver
306 vq' <- liftBase $ modifyMVar var $ \ns -> do
307 let
308 a = ns ^. unNodeStory . at listId . _Just
309 -- apply patches from version p_version to a ^. a_version
310 -- TODO Check this
311 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
312 q = mconcat $ a ^. a_history
313
314 --printDebug "[commitStatePatch] transformWith" (p,q)
315 -- let tws s = case s of
316 -- (Mod p) -> "Mod"
317 -- _ -> "Rpl"
318 -- printDebug "[commitStatePatch] transformWith" (tws $ p ^. _NgramsPatch, tws $ q ^. _NgramsPatch)
319
320 let
321 (p', q') = transformWith ngramsStatePatchConflictResolution p q
322 a' = a & a_version +~ 1
323 & a_state %~ act p'
324 & a_history %~ (p' :)
325
326 {-
327 -- Ideally we would like to check these properties. However:
328 -- * They should be checked only to debug the code. The client data
329 -- should be able to trigger these.
330 -- * What kind of error should they throw (we are in IO here)?
331 -- * Should we keep modifyMVar?
332 -- * Should we throw the validation in an Exception, catch it around
333 -- modifyMVar and throw it back as an Error?
334 assertValid $ transformable p q
335 assertValid $ applicable p' (r ^. r_state)
336 -}
337 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
338 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
339 let newNs = ( ns & unNodeStory . at listId .~ (Just a')
340 , Versioned (a' ^. a_version) q'
341 )
342
343 -- NOTE Now is the only good time to save the archive history. We
344 -- have the handle to the MVar and we need to save its exact
345 -- snapshot. Node Story archive is a linear table, so it's only
346 -- couple of inserts, it shouldn't take long...
347
348 -- If we postponed saving the archive to the debounce action, we
349 -- would have issues like
350 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
351 -- where the `q` computation from above (which uses the archive)
352 -- would cause incorrect patch application (before the previous
353 -- archive was saved and applied)
354 newNs' <- archiveSaver $ fst newNs
355
356 pure (newNs', snd newNs)
357
358 -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
359 saveNodeStory
360 --saveNodeStoryImmediate
361 -- Save new ngrams
362 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
363
364 pure vq'
365
366
367
368 -- This is a special case of tableNgramsPut where the input patch is empty.
369 tableNgramsPull :: HasNodeStory env err m
370 => ListId
371 -> TableNgrams.NgramsType
372 -> Version
373 -> m (Versioned NgramsTablePatch)
374 tableNgramsPull listId ngramsType p_version = do
375 printDebug "[tableNgramsPull]" (listId, ngramsType)
376 var <- getNodeStoryVar [listId]
377 r <- liftBase $ readMVar var
378
379 let
380 a = r ^. unNodeStory . at listId . _Just
381 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
382 q_table = q ^. _PatchMap . at ngramsType . _Just
383
384 pure (Versioned (a ^. a_version) q_table)
385
386
387
388
389 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
390 -- Apply the given patch to the DB and returns the patch to be applied on the
391 -- client.
392 -- TODO-ACCESS check
393 tableNgramsPut :: ( HasNodeStory env err m
394 , HasNodeStoryImmediateSaver env
395 , HasNodeArchiveStoryImmediateSaver env
396 , HasInvalidError err
397 , HasSettings env
398 , HasMail env
399 )
400 => TabType
401 -> ListId
402 -> Versioned NgramsTablePatch
403 -> m (Versioned NgramsTablePatch)
404 tableNgramsPut tabType listId (Versioned p_version p_table)
405 | p_table == mempty = do
406 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
407 let ngramsType = ngramsTypeFromTabType tabType
408 tableNgramsPull listId ngramsType p_version
409
410 | otherwise = do
411 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
412 let ngramsType = ngramsTypeFromTabType tabType
413 (p, p_validity) = PM.singleton ngramsType p_table
414
415 assertValid p_validity
416
417 ret <- commitStatePatch listId (Versioned p_version p)
418 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
419
420 pure ret
421
422
423
424 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
425 , FlowCmdM env err m
426 , HasNodeError err
427 , HasSettings env
428 )
429 => UpdateTableNgramsCharts
430 -> (JobLog -> m ())
431 -> m JobLog
432 tableNgramsPostChartsAsync utn logStatus = do
433 let tabType = utn ^. utn_tab_type
434 let listId = utn ^. utn_list_id
435
436 node <- getNode listId
437 let nId = node ^. node_id
438 _uId = node ^. node_user_id
439 mCId = node ^. node_parent_id
440
441 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
442 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
443
444 case mCId of
445 Nothing -> do
446 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
447 pure $ jobLogFail $ jobLogInit 1
448 Just cId -> do
449 case tabType of
450 Authors -> do
451 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
452 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
453 logRef
454 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
455 logRefSuccess
456
457 getRef
458 Institutes -> do
459 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
460 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
461 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
462 logRef
463 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
464 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
465 logRefSuccess
466 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
467 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
468 logRefSuccess
469 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
470 logRefSuccess
471
472 getRef
473 Sources -> do
474 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
475 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
476 logRef
477 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
478 logRefSuccess
479
480 getRef
481 Terms -> do
482 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
483 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
484 logRef
485 {-
486 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
487 logRefSuccess
488 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
489 logRefSuccess
490 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
491 logRefSuccess
492 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
493 logRefSuccess
494 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
495 logRefSuccess
496 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
497 -}
498 logRefSuccess
499
500 getRef
501 _ -> do
502 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
503 pure $ jobLogFail $ jobLogInit 1
504
505 {-
506 { _ne_list :: ListType
507 If we merge the parents/children we can potentially create cycles!
508 , _ne_parent :: Maybe NgramsTerm
509 , _ne_children :: MSet NgramsTerm
510 }
511 -}
512
513 getNgramsTableMap :: HasNodeStory env err m
514 => NodeId
515 -> TableNgrams.NgramsType
516 -> m (Versioned NgramsTableMap)
517 getNgramsTableMap nodeId ngramsType = do
518 v <- getNodeStoryVar [nodeId]
519 repo <- liftBase $ readMVar v
520 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
521 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
522
523
524 dumpJsonTableMap :: HasNodeStory env err m
525 => Text
526 -> NodeId
527 -> TableNgrams.NgramsType
528 -> m ()
529 dumpJsonTableMap fpath nodeId ngramsType = do
530 m <- getNgramsTableMap nodeId ngramsType
531 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
532 pure ()
533
534
535 type MinSize = Int
536 type MaxSize = Int
537
538 -- | TODO Errors management
539 -- TODO: polymorphic for Annuaire or Corpus or ...
540 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
541 -- TODO: should take only one ListId
542
543
544 getTableNgrams :: forall env err m.
545 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
546 => NodeType -> NodeId -> TabType
547 -> ListId -> Limit -> Maybe Offset
548 -> Maybe ListType
549 -> Maybe MinSize -> Maybe MaxSize
550 -> Maybe OrderBy
551 -> (NgramsTerm -> Bool)
552 -> m (VersionedWithCount NgramsTable)
553 getTableNgrams _nType nId tabType listId limit_ offset
554 listType minSize maxSize orderBy searchQuery = do
555
556 t0 <- getTime
557 -- lIds <- selectNodesWithUsername NodeList userMaster
558 let
559 ngramsType = ngramsTypeFromTabType tabType
560 offset' = maybe 0 identity offset
561 listType' = maybe (const True) (==) listType
562 minSize' = maybe (const True) (<=) minSize
563 maxSize' = maybe (const True) (>=) maxSize
564
565 rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
566 (tableMap ^. at r)
567 )
568 (ne ^. ne_root)
569
570 selected_node n = minSize' s
571 && maxSize' s
572 && searchQuery (n ^. ne_ngrams)
573 && listType' (n ^. ne_list)
574 where
575 s = n ^. ne_size
576
577 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
578
579 ---------------------------------------
580 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
581 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
582 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
583 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to List.nub . to length)
584 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to List.nub . to length)
585
586 ---------------------------------------
587 -- | Filter the given `tableMap` with the search criteria.
588 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
589 filteredNodes tableMap = roots
590 where
591 list = tableMap ^.. each
592 selected_nodes = list & filter selected_node
593 roots = rootOf tableMap <$> selected_nodes
594
595 -- | Appends subitems (selected from `tableMap`) for given `roots`.
596 withInners :: Map NgramsTerm NgramsElement -> [NgramsElement] -> [NgramsElement]
597 withInners tableMap roots = roots <> inners
598 where
599 list = tableMap ^.. each
600 rootSet = Set.fromList (_ne_ngrams <$> roots)
601 inners = list & filter (selected_inner rootSet)
602
603 -- | Paginate the results
604 sortAndPaginate :: [NgramsElement] -> [NgramsElement]
605 sortAndPaginate = take limit_
606 . drop offset'
607 . sortOnOrder orderBy
608
609 ---------------------------------------
610
611 let scoresNeeded = needsScores orderBy
612 t1 <- getTime
613
614 tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
615
616 let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
617
618 let fltrCount = length $ fltr ^. v_data . _NgramsTable
619
620 t2 <- getTime
621 let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
622 t3 <- getTime
623 --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
624 liftBase $ do
625 hprint stderr
626 ("getTableNgrams total=" % hasTime
627 % " map1=" % hasTime
628 % " map2=" % hasTime
629 % " map3=" % hasTime
630 % " sql=" % (if scoresNeeded then "map2" else "map3")
631 % "\n"
632 ) t0 t3 t0 t1 t1 t2 t2 t3
633
634 -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
635 pure $ toVersionedWithCount fltrCount tableMapSorted
636
637
638 -- | Helper function to get the ngrams table with scores.
639 getNgramsTable' :: forall env err m.
640 ( HasNodeStory env err m
641 , HasNodeError err
642 , HasConnectionPool env
643 , HasConfig env
644 , HasMail env)
645 => NodeId
646 -> ListId
647 -> TableNgrams.NgramsType
648 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
649 getNgramsTable' nId listId ngramsType = do
650 tableMap <- getNgramsTableMap listId ngramsType
651 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
652 . Map.mapWithKey ngramsElementFromRepo
653
654 -- | Helper function to set scores on an `NgramsTable`.
655 setNgramsTableScores :: forall env err m t.
656 ( Each t t NgramsElement NgramsElement
657 , HasNodeStory env err m
658 , HasNodeError err
659 , HasConnectionPool env
660 , HasConfig env
661 , HasMail env)
662 => NodeId
663 -> ListId
664 -> TableNgrams.NgramsType
665 -> t
666 -> m t
667 setNgramsTableScores nId listId ngramsType table = do
668 t1 <- getTime
669 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
670 --printDebug "[setNgramsTableScores] occurrences" occurrences
671 t2 <- getTime
672 liftBase $ do
673 let ngrams_terms = table ^.. each . ne_ngrams
674 -- printDebug "ngrams_terms" ngrams_terms
675 hprint stderr
676 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
677 (length ngrams_terms) t1 t2
678 let
679 setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
680
681 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
682
683 pure $ table & each %~ setOcc
684
685
686
687
688 scoresRecomputeTableNgrams :: forall env err m.
689 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
690 => NodeId -> TabType -> ListId -> m Int
691 scoresRecomputeTableNgrams nId tabType listId = do
692 tableMap <- getNgramsTableMap listId ngramsType
693 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
694 . Map.mapWithKey ngramsElementFromRepo
695
696 pure $ 1
697 where
698 ngramsType = ngramsTypeFromTabType tabType
699
700
701 -- APIs
702
703 -- TODO: find a better place for the code above, All APIs stay here
704
705 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
706 deriving (Generic, Enum, Bounded, Read, Show)
707
708 instance FromHttpApiData OrderBy
709 where
710 parseUrlPiece "TermAsc" = pure TermAsc
711 parseUrlPiece "TermDesc" = pure TermDesc
712 parseUrlPiece "ScoreAsc" = pure ScoreAsc
713 parseUrlPiece "ScoreDesc" = pure ScoreDesc
714 parseUrlPiece _ = Left "Unexpected value of OrderBy"
715
716 instance ToHttpApiData OrderBy where
717 toUrlPiece = pack . show
718
719 instance ToParamSchema OrderBy
720 instance FromJSON OrderBy
721 instance ToJSON OrderBy
722 instance ToSchema OrderBy
723 instance Arbitrary OrderBy
724 where
725 arbitrary = elements [minBound..maxBound]
726
727 needsScores :: Maybe OrderBy -> Bool
728 needsScores (Just ScoreAsc) = True
729 needsScores (Just ScoreDesc) = True
730 needsScores _ = False
731
732 type TableNgramsApiGet = Summary " Table Ngrams API Get"
733 :> QueryParamR "ngramsType" TabType
734 :> QueryParamR "list" ListId
735 :> QueryParamR "limit" Limit
736 :> QueryParam "offset" Offset
737 :> QueryParam "listType" ListType
738 :> QueryParam "minTermSize" MinSize
739 :> QueryParam "maxTermSize" MaxSize
740 :> QueryParam "orderBy" OrderBy
741 :> QueryParam "search" Text
742 :> Get '[JSON] (VersionedWithCount NgramsTable)
743
744 type TableNgramsApiPut = Summary " Table Ngrams API Change"
745 :> QueryParamR "ngramsType" TabType
746 :> QueryParamR "list" ListId
747 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
748 :> Put '[JSON] (Versioned NgramsTablePatch)
749
750 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
751 :> QueryParamR "ngramsType" TabType
752 :> QueryParamR "list" ListId
753 :> "recompute" :> Post '[JSON] Int
754
755 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
756 :> QueryParamR "ngramsType" TabType
757 :> QueryParamR "list" ListId
758 :> Get '[JSON] Version
759
760 type TableNgramsApi = TableNgramsApiGet
761 :<|> TableNgramsApiPut
762 :<|> RecomputeScoresNgramsApiGet
763 :<|> "version" :> TableNgramsApiGetVersion
764 :<|> TableNgramsAsyncApi
765
766 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
767 :> "async"
768 :> "charts"
769 :> "update"
770 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
771
772 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
773 => NodeId
774 -> TabType
775 -> ListId
776 -> Limit
777 -> Maybe Offset
778 -> Maybe ListType
779 -> Maybe MinSize -> Maybe MaxSize
780 -> Maybe OrderBy
781 -> Maybe Text -- full text search
782 -> m (VersionedWithCount NgramsTable)
783 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
784 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
785 where
786 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
787
788
789
790 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
791 => NodeId
792 -> TabType
793 -> ListId
794 -> m Version
795 getTableNgramsVersion _nId _tabType listId = currentVersion listId
796
797
798
799 -- TODO: limit?
800 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
801 -- This line above looks like a waste of computation to finally get only the version.
802 -- See the comment about listNgramsChangedSince.
803
804
805 -- | Text search is deactivated for now for ngrams by doc only
806 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
807 => DocId -> TabType
808 -> ListId -> Limit -> Maybe Offset
809 -> Maybe ListType
810 -> Maybe MinSize -> Maybe MaxSize
811 -> Maybe OrderBy
812 -> Maybe Text -- full text search
813 -> m (VersionedWithCount NgramsTable)
814 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
815 ns <- selectNodesWithUsername NodeList userMaster
816 let ngramsType = ngramsTypeFromTabType tabType
817 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
818 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
819 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
820
821
822
823 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
824 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
825 :<|> tableNgramsPut
826 :<|> scoresRecomputeTableNgrams cId
827 :<|> getTableNgramsVersion cId
828 :<|> apiNgramsAsync cId
829
830 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
831 apiNgramsTableDoc dId = getTableNgramsDoc dId
832 :<|> tableNgramsPut
833 :<|> scoresRecomputeTableNgrams dId
834 :<|> getTableNgramsVersion dId
835 :<|> apiNgramsAsync dId
836
837 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
838 apiNgramsAsync _dId =
839 serveJobsAPI TableNgramsJob $ \i log ->
840 let
841 log' x = do
842 printDebug "tableNgramsPostChartsAsync" x
843 liftBase $ log x
844 in tableNgramsPostChartsAsync i log'
845
846 -- Did the given list of ngrams changed since the given version?
847 -- The returned value is versioned boolean value, meaning that one always retrieve the
848 -- latest version.
849 -- If the given version is negative then one simply receive the latest version and True.
850 -- Using this function is more precise than simply comparing the latest version number
851 -- with the local version number. Indeed there might be no change to this particular list
852 -- and still the version number has changed because of other lists.
853 --
854 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
855 -- * currentVersion: good computation, good bandwidth, bad precision.
856 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
857 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
858 listNgramsChangedSince :: HasNodeStory env err m
859 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
860 listNgramsChangedSince listId ngramsType version
861 | version < 0 =
862 Versioned <$> currentVersion listId <*> pure True
863 | otherwise =
864 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)