]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[list] fixes to list CSV upload
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , getNgramsTable'
73 , setNgramsTableScores
74
75 , Version
76 , Versioned(..)
77 , VersionedWithCount(..)
78 , currentVersion
79 , listNgramsChangedSince
80 , MinSize, MaxSize, OrderBy, NgramsTable
81 , UpdateTableNgramsCharts
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import Data.Either (Either(..))
90 import Data.Foldable
91 import Data.Map.Strict (Map)
92 import Data.Maybe (fromMaybe)
93 import Data.Monoid
94 import Data.Ord (Down(..))
95 import Data.Patch.Class (Action(act), Transformable(..), ours)
96 import Data.Swagger hiding (version, patch)
97 import Data.Text (Text, isInfixOf, toLower, unpack, pack)
98 import Data.Text.Lazy.IO as DTL
99 import Formatting (hprint, int, (%))
100 import GHC.Generics (Generic)
101 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
102 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
103 import Gargantext.API.Admin.Types (HasSettings)
104 import Gargantext.API.Job
105 import Gargantext.API.Ngrams.Types
106 import Gargantext.API.Prelude
107 import Gargantext.Core.NodeStory
108 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
109 import Gargantext.API.Ngrams.Tools
110 import Gargantext.Database.Action.Flow.Types
111 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
112 import Gargantext.Database.Admin.Config (userMaster)
113 import Gargantext.Database.Admin.Types.Node (NodeType(..))
114 import Gargantext.Database.Prelude (CmdCommon)
115 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
116 import Gargantext.Database.Query.Table.Node (getNode)
117 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
118 import Gargantext.Database.Query.Table.Node.Select
119 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
120 import Gargantext.Prelude hiding (log)
121 import Gargantext.Prelude.Clock (hasTime, getTime)
122 import Prelude (error)
123 import Servant hiding (Patch)
124 import Gargantext.Utils.Jobs (serveJobsAPI, jobHandleLogger)
125 import System.IO (stderr)
126 import Test.QuickCheck (elements)
127 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
128 import qualified Data.Aeson.Text as DAT
129 import qualified Data.List as List
130 import qualified Data.Map.Strict as Map
131 import qualified Data.Map.Strict.Patch as PM
132 import qualified Data.Set as S
133 import qualified Data.Set as Set
134 import qualified Gargantext.API.Metrics as Metrics
135 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
136
137 {-
138 -- TODO sequences of modifications (Patchs)
139 type NgramsIdPatch = Patch NgramsId NgramsPatch
140
141 ngramsPatch :: Int -> NgramsPatch
142 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
143
144 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
145 toEdit n p = Edit n p
146 ngramsIdPatch :: Patch NgramsId NgramsPatch
147 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
148 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
149 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
150 ]
151
152 -- applyPatchBack :: Patch -> IO Patch
153 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
154 -}
155 ------------------------------------------------------------------------
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158
159 {-
160 -- TODO: Replace.old is ignored which means that if the current list
161 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
162 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
163 -- However this should not happen in non conflicting situations.
164 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
165 mkListsUpdate nt patches =
166 [ (ngramsTypeId nt, ng, listTypeId lt)
167 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
168 , lt <- patch ^.. patch_list . new
169 ]
170
171 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
172 -> NgramsType
173 -> NgramsTablePatch
174 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
175 mkChildrenGroups addOrRem nt patches =
176 [ (ngramsTypeId nt, parent, child)
177 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
178 , child <- patch ^.. patch_children . to addOrRem . folded
179 ]
180 -}
181
182 ------------------------------------------------------------------------
183
184 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
185 => m ()
186 saveNodeStory = do
187 saver <- view hasNodeStorySaver
188 liftBase $ do
189 --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
190 saver
191 --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
192
193
194 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
195 => m ()
196 saveNodeStoryImmediate = do
197 saver <- view hasNodeStoryImmediateSaver
198 liftBase $ do
199 --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
200 saver
201 --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
202
203 listTypeConflictResolution :: ListType -> ListType -> ListType
204 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
205
206
207 ngramsStatePatchConflictResolution
208 :: TableNgrams.NgramsType
209 -> NgramsTerm
210 -> ConflictResolutionNgramsPatch
211 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
212 = (ours, (const ours, ours), (False, False))
213 -- (False, False) mean here that Mod has always priority.
214 -- = (ours, (const ours, ours), (True, False))
215 -- (True, False) <- would mean priority to the left (same as ours).
216 -- undefined {- TODO think this through -}, listTypeConflictResolution)
217
218
219
220
221 -- Current state:
222 -- Insertions are not considered as patches,
223 -- they do not extend history,
224 -- they do not bump version.
225 insertNewOnly :: a -> Maybe b -> a
226 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
227 -- TODO error handling
228
229 {- unused
230 -- TODO refactor with putListNgrams
231 copyListNgrams :: RepoCmdM env err m
232 => NodeId -> NodeId -> NgramsType
233 -> m ()
234 copyListNgrams srcListId dstListId ngramsType = do
235 var <- view repoVar
236 liftBase $ modifyMVar_ var $
237 pure . (r_state . at ngramsType %~ (Just . f . something))
238 saveNodeStory
239 where
240 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
241 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
242
243 -- TODO refactor with putListNgrams
244 -- The list must be non-empty!
245 -- The added ngrams must be non-existent!
246 addListNgrams :: RepoCmdM env err m
247 => NodeId -> NgramsType
248 -> [NgramsElement] -> m ()
249 addListNgrams listId ngramsType nes = do
250 var <- view repoVar
251 liftBase $ modifyMVar_ var $
252 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
253 saveNodeStory
254 where
255 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
256 -}
257
258 -- | TODO: incr the Version number
259 -- && should use patch
260 -- UNSAFE
261
262 setListNgrams :: HasNodeStory env err m
263 => NodeId
264 -> TableNgrams.NgramsType
265 -> Map NgramsTerm NgramsRepoElement
266 -> m ()
267 setListNgrams listId ngramsType ns = do
268 -- printDebug "[setListNgrams]" (listId, ngramsType)
269 getter <- view hasNodeStory
270 var <- liftBase $ (getter ^. nse_getter) [listId]
271 liftBase $ modifyMVar_ var $
272 pure . ( unNodeStory
273 . at listId . _Just
274 . a_state
275 . at ngramsType
276 .~ Just ns
277 )
278 saveNodeStory
279
280
281 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
282 newNgramsFromNgramsStatePatch p =
283 [ text2ngrams (unNgramsTerm n)
284 | (n,np) <- p ^.. _PatchMap
285 -- . each . _PatchMap
286 . each . _NgramsTablePatch
287 . _PatchMap . ifolded . withIndex
288 , _ <- np ^.. patch_new . _Just
289 ]
290
291
292
293
294 commitStatePatch :: ( HasNodeStory env err m
295 , HasNodeStoryImmediateSaver env
296 , HasNodeArchiveStoryImmediateSaver env
297 , CmdCommon env )
298 => ListId
299 -> Versioned NgramsStatePatch'
300 -> m (Versioned NgramsStatePatch')
301 commitStatePatch listId (Versioned _p_version p) = do
302 -- printDebug "[commitStatePatch]" listId
303 var <- getNodeStoryVar [listId]
304 archiveSaver <- view hasNodeArchiveStoryImmediateSaver
305 vq' <- liftBase $ modifyMVar var $ \ns -> do
306 let
307 a = ns ^. unNodeStory . at listId . _Just
308 -- apply patches from version p_version to a ^. a_version
309 -- TODO Check this
310 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
311 q = mconcat $ a ^. a_history
312
313 --printDebug "[commitStatePatch] transformWith" (p,q)
314 -- let tws s = case s of
315 -- (Mod p) -> "Mod"
316 -- _ -> "Rpl"
317 -- printDebug "[commitStatePatch] transformWith" (tws $ p ^. _NgramsPatch, tws $ q ^. _NgramsPatch)
318
319 let
320 (p', q') = transformWith ngramsStatePatchConflictResolution p q
321 a' = a & a_version +~ 1
322 & a_state %~ act p'
323 & a_history %~ (p' :)
324
325 {-
326 -- Ideally we would like to check these properties. However:
327 -- * They should be checked only to debug the code. The client data
328 -- should be able to trigger these.
329 -- * What kind of error should they throw (we are in IO here)?
330 -- * Should we keep modifyMVar?
331 -- * Should we throw the validation in an Exception, catch it around
332 -- modifyMVar and throw it back as an Error?
333 assertValid $ transformable p q
334 assertValid $ applicable p' (r ^. r_state)
335 -}
336 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
337 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
338 let newNs = ( ns & unNodeStory . at listId .~ (Just a')
339 , Versioned (a' ^. a_version) q'
340 )
341
342 -- NOTE Now is the only good time to save the archive history. We
343 -- have the handle to the MVar and we need to save its exact
344 -- snapshot. Node Story archive is a linear table, so it's only
345 -- couple of inserts, it shouldn't take long...
346
347 -- If we postponed saving the archive to the debounce action, we
348 -- would have issues like
349 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
350 -- where the `q` computation from above (which uses the archive)
351 -- would cause incorrect patch application (before the previous
352 -- archive was saved and applied)
353 newNs' <- archiveSaver $ fst newNs
354
355 pure (newNs', snd newNs)
356
357 -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
358 saveNodeStory
359 --saveNodeStoryImmediate
360 -- Save new ngrams
361 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
362
363 pure vq'
364
365
366
367 -- This is a special case of tableNgramsPut where the input patch is empty.
368 tableNgramsPull :: HasNodeStory env err m
369 => ListId
370 -> TableNgrams.NgramsType
371 -> Version
372 -> m (Versioned NgramsTablePatch)
373 tableNgramsPull listId ngramsType p_version = do
374 -- printDebug "[tableNgramsPull]" (listId, ngramsType)
375 var <- getNodeStoryVar [listId]
376 r <- liftBase $ readMVar var
377
378 let
379 a = r ^. unNodeStory . at listId . _Just
380 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
381 q_table = q ^. _PatchMap . at ngramsType . _Just
382
383 pure (Versioned (a ^. a_version) q_table)
384
385
386
387
388 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
389 -- Apply the given patch to the DB and returns the patch to be applied on the
390 -- client.
391 -- TODO-ACCESS check
392 tableNgramsPut :: ( HasNodeStory env err m
393 , HasNodeStoryImmediateSaver env
394 , HasNodeArchiveStoryImmediateSaver env
395 , HasInvalidError err
396 , CmdCommon env
397 )
398 => TabType
399 -> ListId
400 -> Versioned NgramsTablePatch
401 -> m (Versioned NgramsTablePatch)
402 tableNgramsPut tabType listId (Versioned p_version p_table)
403 | p_table == mempty = do
404 -- printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
405 let ngramsType = ngramsTypeFromTabType tabType
406 tableNgramsPull listId ngramsType p_version
407
408 | otherwise = do
409 -- printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
410 let ngramsType = ngramsTypeFromTabType tabType
411 (p, p_validity) = PM.singleton ngramsType p_table
412
413 assertValid p_validity
414
415 ret <- commitStatePatch listId (Versioned p_version p)
416 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
417
418 pure ret
419
420
421
422 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
423 , FlowCmdM env err m
424 , HasNodeError err
425 , HasSettings env
426 )
427 => UpdateTableNgramsCharts
428 -> (JobLog -> m ())
429 -> m JobLog
430 tableNgramsPostChartsAsync utn logStatus = do
431 let tabType = utn ^. utn_tab_type
432 let listId = utn ^. utn_list_id
433
434 node <- getNode listId
435 let _nId = node ^. node_id
436 _uId = node ^. node_user_id
437 mCId = node ^. node_parent_id
438
439 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
440 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
441
442 case mCId of
443 Nothing -> do
444 -- printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
445 pure $ jobLogFail $ jobLogInit 1
446 Just cId -> do
447 case tabType of
448 Authors -> do
449 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
450 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
451 logRef
452 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
453 logRefSuccess
454
455 getRef
456 Institutes -> do
457 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
458 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
459 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
460 logRef
461 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
462 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
463 logRefSuccess
464 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
465 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
466 logRefSuccess
467 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
468 logRefSuccess
469
470 getRef
471 Sources -> do
472 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
473 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
474 logRef
475 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
476 logRefSuccess
477
478 getRef
479 Terms -> do
480 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
481 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
482 logRef
483 {-
484 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
485 logRefSuccess
486 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
487 logRefSuccess
488 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
489 logRefSuccess
490 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
491 logRefSuccess
492 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
493 logRefSuccess
494 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
495 -}
496 logRefSuccess
497
498 getRef
499 _ -> do
500 -- printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
501 pure $ jobLogFail $ jobLogInit 1
502
503 {-
504 { _ne_list :: ListType
505 If we merge the parents/children we can potentially create cycles!
506 , _ne_parent :: Maybe NgramsTerm
507 , _ne_children :: MSet NgramsTerm
508 }
509 -}
510
511 getNgramsTableMap :: HasNodeStory env err m
512 => NodeId
513 -> TableNgrams.NgramsType
514 -> m (Versioned NgramsTableMap)
515 getNgramsTableMap nodeId ngramsType = do
516 v <- getNodeStoryVar [nodeId]
517 repo <- liftBase $ readMVar v
518 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
519 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
520
521
522 dumpJsonTableMap :: HasNodeStory env err m
523 => Text
524 -> NodeId
525 -> TableNgrams.NgramsType
526 -> m ()
527 dumpJsonTableMap fpath nodeId ngramsType = do
528 m <- getNgramsTableMap nodeId ngramsType
529 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
530 pure ()
531
532
533 type MinSize = Int
534 type MaxSize = Int
535
536 -- | TODO Errors management
537 -- TODO: polymorphic for Annuaire or Corpus or ...
538 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
539 -- TODO: should take only one ListId
540
541
542 getTableNgrams :: forall env err m.
543 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
544 => NodeType -> NodeId -> TabType
545 -> ListId -> Limit -> Maybe Offset
546 -> Maybe ListType
547 -> Maybe MinSize -> Maybe MaxSize
548 -> Maybe OrderBy
549 -> (NgramsTerm -> Bool)
550 -> m (VersionedWithCount NgramsTable)
551 getTableNgrams _nType nId tabType listId limit_ offset
552 listType minSize maxSize orderBy searchQuery = do
553
554 t0 <- getTime
555 -- lIds <- selectNodesWithUsername NodeList userMaster
556 let
557 ngramsType = ngramsTypeFromTabType tabType
558 offset' = maybe 0 identity offset
559 listType' = maybe (const True) (==) listType
560 minSize' = maybe (const True) (<=) minSize
561 maxSize' = maybe (const True) (>=) maxSize
562
563 rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
564 (tableMap ^. at r)
565 )
566 (ne ^. ne_root)
567
568 selected_node n = minSize' s
569 && maxSize' s
570 && searchQuery (n ^. ne_ngrams)
571 && listType' (n ^. ne_list)
572 where
573 s = n ^. ne_size
574
575 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
576
577 ---------------------------------------
578 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
579 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
580 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
581 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to List.nub . to length)
582 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to List.nub . to length)
583
584 ---------------------------------------
585 -- | Filter the given `tableMap` with the search criteria.
586 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
587 filteredNodes tableMap = roots
588 where
589 list = tableMap ^.. each
590 selected_nodes = list & filter selected_node
591 roots = rootOf tableMap <$> selected_nodes
592
593 -- | Appends subitems (selected from `tableMap`) for given `roots`.
594 withInners :: Map NgramsTerm NgramsElement -> [NgramsElement] -> [NgramsElement]
595 withInners tableMap roots = roots <> inners
596 where
597 list = tableMap ^.. each
598 rootSet = Set.fromList (_ne_ngrams <$> roots)
599 inners = list & filter (selected_inner rootSet)
600
601 -- | Paginate the results
602 sortAndPaginate :: [NgramsElement] -> [NgramsElement]
603 sortAndPaginate = take limit_
604 . drop offset'
605 . sortOnOrder orderBy
606
607 ---------------------------------------
608
609 let scoresNeeded = needsScores orderBy
610 t1 <- getTime
611
612 tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
613
614 let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
615
616 let fltrCount = length $ fltr ^. v_data . _NgramsTable
617
618 t2 <- getTime
619 let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
620 t3 <- getTime
621 --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
622 liftBase $ do
623 hprint stderr
624 ("getTableNgrams total=" % hasTime
625 % " map1=" % hasTime
626 % " map2=" % hasTime
627 % " map3=" % hasTime
628 % " sql=" % (if scoresNeeded then "map2" else "map3")
629 % "\n"
630 ) t0 t3 t0 t1 t1 t2 t2 t3
631
632 -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
633 pure $ toVersionedWithCount fltrCount tableMapSorted
634
635
636 -- | Helper function to get the ngrams table with scores.
637 getNgramsTable' :: forall env err m.
638 ( HasNodeStory env err m
639 , HasNodeError err
640 , CmdCommon env)
641 => NodeId
642 -> ListId
643 -> TableNgrams.NgramsType
644 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
645 getNgramsTable' nId listId ngramsType = do
646 tableMap <- getNgramsTableMap listId ngramsType
647 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
648 . Map.mapWithKey ngramsElementFromRepo
649
650 -- | Helper function to set scores on an `NgramsTable`.
651 setNgramsTableScores :: forall env err m t.
652 ( Each t t NgramsElement NgramsElement
653 , HasNodeStory env err m
654 , HasNodeError err
655 , CmdCommon env )
656 => NodeId
657 -> ListId
658 -> TableNgrams.NgramsType
659 -> t
660 -> m t
661 setNgramsTableScores nId listId ngramsType table = do
662 t1 <- getTime
663 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
664 --printDebug "[setNgramsTableScores] occurrences" occurrences
665 t2 <- getTime
666 liftBase $ do
667 let ngrams_terms = table ^.. each . ne_ngrams
668 -- printDebug "ngrams_terms" ngrams_terms
669 hprint stderr
670 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
671 (length ngrams_terms) t1 t2
672 let
673 setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
674
675 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
676
677 pure $ table & each %~ setOcc
678
679
680
681
682 scoresRecomputeTableNgrams :: forall env err m.
683 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
684 => NodeId -> TabType -> ListId -> m Int
685 scoresRecomputeTableNgrams nId tabType listId = do
686 tableMap <- getNgramsTableMap listId ngramsType
687 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
688 . Map.mapWithKey ngramsElementFromRepo
689
690 pure $ 1
691 where
692 ngramsType = ngramsTypeFromTabType tabType
693
694
695 -- APIs
696
697 -- TODO: find a better place for the code above, All APIs stay here
698
699 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
700 deriving (Generic, Enum, Bounded, Read, Show)
701
702 instance FromHttpApiData OrderBy
703 where
704 parseUrlPiece "TermAsc" = pure TermAsc
705 parseUrlPiece "TermDesc" = pure TermDesc
706 parseUrlPiece "ScoreAsc" = pure ScoreAsc
707 parseUrlPiece "ScoreDesc" = pure ScoreDesc
708 parseUrlPiece _ = Left "Unexpected value of OrderBy"
709
710 instance ToHttpApiData OrderBy where
711 toUrlPiece = pack . show
712
713 instance ToParamSchema OrderBy
714 instance FromJSON OrderBy
715 instance ToJSON OrderBy
716 instance ToSchema OrderBy
717 instance Arbitrary OrderBy
718 where
719 arbitrary = elements [minBound..maxBound]
720
721 needsScores :: Maybe OrderBy -> Bool
722 needsScores (Just ScoreAsc) = True
723 needsScores (Just ScoreDesc) = True
724 needsScores _ = False
725
726 type TableNgramsApiGet = Summary " Table Ngrams API Get"
727 :> QueryParamR "ngramsType" TabType
728 :> QueryParamR "list" ListId
729 :> QueryParamR "limit" Limit
730 :> QueryParam "offset" Offset
731 :> QueryParam "listType" ListType
732 :> QueryParam "minTermSize" MinSize
733 :> QueryParam "maxTermSize" MaxSize
734 :> QueryParam "orderBy" OrderBy
735 :> QueryParam "search" Text
736 :> Get '[JSON] (VersionedWithCount NgramsTable)
737
738 type TableNgramsApiPut = Summary " Table Ngrams API Change"
739 :> QueryParamR "ngramsType" TabType
740 :> QueryParamR "list" ListId
741 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
742 :> Put '[JSON] (Versioned NgramsTablePatch)
743
744 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
745 :> QueryParamR "ngramsType" TabType
746 :> QueryParamR "list" ListId
747 :> "recompute" :> Post '[JSON] Int
748
749 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
750 :> QueryParamR "ngramsType" TabType
751 :> QueryParamR "list" ListId
752 :> Get '[JSON] Version
753
754 type TableNgramsApi = TableNgramsApiGet
755 :<|> TableNgramsApiPut
756 :<|> RecomputeScoresNgramsApiGet
757 :<|> "version" :> TableNgramsApiGetVersion
758 :<|> TableNgramsAsyncApi
759
760 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
761 :> "async"
762 :> "charts"
763 :> "update"
764 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
765
766 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
767 => NodeId
768 -> TabType
769 -> ListId
770 -> Limit
771 -> Maybe Offset
772 -> Maybe ListType
773 -> Maybe MinSize -> Maybe MaxSize
774 -> Maybe OrderBy
775 -> Maybe Text -- full text search
776 -> m (VersionedWithCount NgramsTable)
777 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
778 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
779 where
780 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
781
782
783
784 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
785 => NodeId
786 -> TabType
787 -> ListId
788 -> m Version
789 getTableNgramsVersion _nId _tabType listId = currentVersion listId
790
791
792
793 -- TODO: limit?
794 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
795 -- This line above looks like a waste of computation to finally get only the version.
796 -- See the comment about listNgramsChangedSince.
797
798
799 -- | Text search is deactivated for now for ngrams by doc only
800 getTableNgramsDoc :: ( HasNodeStory env err m, HasNodeError err, CmdCommon env)
801 => DocId -> TabType
802 -> ListId -> Limit -> Maybe Offset
803 -> Maybe ListType
804 -> Maybe MinSize -> Maybe MaxSize
805 -> Maybe OrderBy
806 -> Maybe Text -- full text search
807 -> m (VersionedWithCount NgramsTable)
808 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
809 ns <- selectNodesWithUsername NodeList userMaster
810 let ngramsType = ngramsTypeFromTabType tabType
811 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
812 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
813 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
814
815
816
817 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
818 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
819 :<|> tableNgramsPut
820 :<|> scoresRecomputeTableNgrams cId
821 :<|> getTableNgramsVersion cId
822 :<|> apiNgramsAsync cId
823
824 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
825 apiNgramsTableDoc dId = getTableNgramsDoc dId
826 :<|> tableNgramsPut
827 :<|> scoresRecomputeTableNgrams dId
828 :<|> getTableNgramsVersion dId
829 :<|> apiNgramsAsync dId
830
831 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
832 apiNgramsAsync _dId =
833 serveJobsAPI TableNgramsJob $ \jHandle i ->
834 let
835 log' x = do
836 printDebug "tableNgramsPostChartsAsync" x
837 jobHandleLogger jHandle x
838 in tableNgramsPostChartsAsync i log'
839
840 -- Did the given list of ngrams changed since the given version?
841 -- The returned value is versioned boolean value, meaning that one always retrieve the
842 -- latest version.
843 -- If the given version is negative then one simply receive the latest version and True.
844 -- Using this function is more precise than simply comparing the latest version number
845 -- with the local version number. Indeed there might be no change to this particular list
846 -- and still the version number has changed because of other lists.
847 --
848 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
849 -- * currentVersion: good computation, good bandwidth, bad precision.
850 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
851 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
852 listNgramsChangedSince :: HasNodeStory env err m
853 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
854 listNgramsChangedSince listId ngramsType version
855 | version < 0 =
856 Versioned <$> currentVersion listId <*> pure True
857 | otherwise =
858 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)