]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[frame write] implement flowDataText to insert docs into corpus
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsStatePatch
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepo
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , initRepo
59
60 , RepoEnv(..)
61 , renv_var
62 , renv_lock
63
64 , TabType(..)
65
66 , QueryParamR
67 , TODO
68
69 -- Internals
70 , getNgramsTableMap
71 , dumpJsonTableMap
72 , tableNgramsPull
73 , tableNgramsPut
74
75 , Version
76 , Versioned(..)
77 , VersionedWithCount(..)
78 , currentVersion
79 , listNgramsChangedSince
80 )
81 where
82
83 import Control.Concurrent
84 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
85 import Control.Monad.Reader
86 import Data.Aeson hiding ((.=))
87 import Data.Either (Either(..))
88 import Data.Foldable
89 import Data.Map.Strict (Map)
90 import Data.Maybe (fromMaybe)
91 import Data.Monoid
92 import Data.Ord (Down(..))
93 import Data.Patch.Class (Action(act), Transformable(..), ours)
94 import Data.Swagger hiding (version, patch)
95 import Data.Text (Text, isInfixOf, unpack)
96 import Data.Text.Lazy.IO as DTL
97 import Formatting (hprint, int, (%))
98 import GHC.Generics (Generic)
99 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
100 import Gargantext.API.Admin.Types (HasSettings)
101 import Gargantext.API.Job
102 import Gargantext.API.Ngrams.Types
103 import Gargantext.API.Prelude
104 import Gargantext.Core.NodeStory
105 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
106 import Gargantext.API.Ngrams.Tools
107 import Gargantext.Database.Action.Flow.Types
108 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
109 import Gargantext.Database.Admin.Config (userMaster)
110 import Gargantext.Database.Admin.Types.Node (NodeType(..))
111 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
112 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
113 import Gargantext.Database.Query.Table.Node (getNode)
114 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
115 import Gargantext.Database.Query.Table.Node.Select
116 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
117 import Gargantext.Prelude hiding (log)
118 import Gargantext.Prelude.Clock (hasTime, getTime)
119 import Prelude (error)
120 import Servant hiding (Patch)
121 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
122 import System.IO (stderr)
123 import Test.QuickCheck (elements)
124 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
125 import qualified Data.Aeson.Text as DAT
126 import qualified Data.List as List
127 import qualified Data.Map.Strict as Map
128 import qualified Data.Map.Strict.Patch as PM
129 import qualified Data.Set as S
130 import qualified Data.Set as Set
131 import qualified Gargantext.API.Metrics as Metrics
132 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
133
134 {-
135 -- TODO sequences of modifications (Patchs)
136 type NgramsIdPatch = Patch NgramsId NgramsPatch
137
138 ngramsPatch :: Int -> NgramsPatch
139 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
140
141 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
142 toEdit n p = Edit n p
143 ngramsIdPatch :: Patch NgramsId NgramsPatch
144 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
145 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
146 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
147 ]
148
149 -- applyPatchBack :: Patch -> IO Patch
150 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
151 -}
152 ------------------------------------------------------------------------
153 ------------------------------------------------------------------------
154 ------------------------------------------------------------------------
155
156 {-
157 -- TODO: Replace.old is ignored which means that if the current list
158 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
159 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
160 -- However this should not happen in non conflicting situations.
161 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
162 mkListsUpdate nt patches =
163 [ (ngramsTypeId nt, ng, listTypeId lt)
164 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
165 , lt <- patch ^.. patch_list . new
166 ]
167
168 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
169 -> NgramsType
170 -> NgramsTablePatch
171 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
172 mkChildrenGroups addOrRem nt patches =
173 [ (ngramsTypeId nt, parent, child)
174 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
175 , child <- patch ^.. patch_children . to addOrRem . folded
176 ]
177 -}
178
179 ------------------------------------------------------------------------
180
181 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
182 => m ()
183 saveNodeStory = liftBase =<< view hasNodeStorySaver
184
185
186 listTypeConflictResolution :: ListType -> ListType -> ListType
187 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
188
189
190 ngramsStatePatchConflictResolution
191 :: TableNgrams.NgramsType
192 -> NgramsTerm
193 -> ConflictResolutionNgramsPatch
194 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
195 = (ours, (const ours, ours), (False, False))
196 -- (False, False) mean here that Mod has always priority.
197 -- (True, False) <- would mean priority to the left (same as ours).
198 -- undefined {- TODO think this through -}, listTypeConflictResolution)
199
200
201
202
203 -- Current state:
204 -- Insertions are not considered as patches,
205 -- they do not extend history,
206 -- they do not bump version.
207 insertNewOnly :: a -> Maybe b -> a
208 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
209 -- TODO error handling
210
211 {- unused
212 -- TODO refactor with putListNgrams
213 copyListNgrams :: RepoCmdM env err m
214 => NodeId -> NodeId -> NgramsType
215 -> m ()
216 copyListNgrams srcListId dstListId ngramsType = do
217 var <- view repoVar
218 liftBase $ modifyMVar_ var $
219 pure . (r_state . at ngramsType %~ (Just . f . something))
220 saveNodeStory
221 where
222 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
223 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
224
225 -- TODO refactor with putListNgrams
226 -- The list must be non-empty!
227 -- The added ngrams must be non-existent!
228 addListNgrams :: RepoCmdM env err m
229 => NodeId -> NgramsType
230 -> [NgramsElement] -> m ()
231 addListNgrams listId ngramsType nes = do
232 var <- view repoVar
233 liftBase $ modifyMVar_ var $
234 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
235 saveNodeStory
236 where
237 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
238 -}
239
240 -- | TODO: incr the Version number
241 -- && should use patch
242 -- UNSAFE
243
244 setListNgrams :: HasNodeStory env err m
245 => NodeId
246 -> TableNgrams.NgramsType
247 -> Map NgramsTerm NgramsRepoElement
248 -> m ()
249 setListNgrams listId ngramsType ns = do
250 printDebug "[setListNgrams]" (listId, ngramsType)
251 getter <- view hasNodeStory
252 var <- liftBase $ (getter ^. nse_getter) [listId]
253 liftBase $ modifyMVar_ var $
254 pure . ( unNodeStory
255 . at listId . _Just
256 . a_state
257 . at ngramsType
258 .~ Just ns
259 )
260 saveNodeStory
261
262
263 currentVersion :: HasNodeStory env err m
264 => ListId -> m Version
265 currentVersion listId = do
266 nls <- getRepo' [listId]
267 pure $ nls ^. unNodeStory . at listId . _Just . a_version
268
269
270 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
271 newNgramsFromNgramsStatePatch p =
272 [ text2ngrams (unNgramsTerm n)
273 | (n,np) <- p ^.. _PatchMap
274 -- . each . _PatchMap
275 . each . _NgramsTablePatch
276 . _PatchMap . ifolded . withIndex
277 , _ <- np ^.. patch_new . _Just
278 ]
279
280
281
282
283 commitStatePatch :: HasNodeStory env err m
284 => ListId
285 -> Versioned NgramsStatePatch'
286 -> m (Versioned NgramsStatePatch')
287 commitStatePatch listId (Versioned p_version p) = do
288 printDebug "[commitStatePatch]" listId
289 var <- getNodeStoryVar [listId]
290 vq' <- liftBase $ modifyMVar var $ \ns -> do
291 let
292 a = ns ^. unNodeStory . at listId . _Just
293 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
294 (p', q') = transformWith ngramsStatePatchConflictResolution p q
295 a' = a & a_version +~ 1
296 & a_state %~ act p'
297 & a_history %~ (p' :)
298
299 {-
300 -- Ideally we would like to check these properties. However:
301 -- * They should be checked only to debug the code. The client data
302 -- should be able to trigger these.
303 -- * What kind of error should they throw (we are in IO here)?
304 -- * Should we keep modifyMVar?
305 -- * Should we throw the validation in an Exception, catch it around
306 -- modifyMVar and throw it back as an Error?
307 assertValid $ transformable p q
308 assertValid $ applicable p' (r ^. r_state)
309 -}
310 printDebug "[commitStatePatch] a version" (a ^. a_version)
311 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
312 pure ( ns & unNodeStory . at listId .~ (Just a')
313 , Versioned (a' ^. a_version) q'
314 )
315 saveNodeStory
316 -- Save new ngrams
317 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
318
319 pure vq'
320
321
322
323 -- This is a special case of tableNgramsPut where the input patch is empty.
324 tableNgramsPull :: HasNodeStory env err m
325 => ListId
326 -> TableNgrams.NgramsType
327 -> Version
328 -> m (Versioned NgramsTablePatch)
329 tableNgramsPull listId ngramsType p_version = do
330 printDebug "[tableNgramsPull]" (listId, ngramsType)
331 var <- getNodeStoryVar [listId]
332 r <- liftBase $ readMVar var
333
334 let
335 a = r ^. unNodeStory . at listId . _Just
336 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
337 q_table = q ^. _PatchMap . at ngramsType . _Just
338
339 pure (Versioned (a ^. a_version) q_table)
340
341
342
343
344 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
345 -- Apply the given patch to the DB and returns the patch to be applied on the
346 -- client.
347 -- TODO-ACCESS check
348 tableNgramsPut :: ( HasNodeStory env err m
349 , HasInvalidError err
350 , HasSettings env
351 )
352 => TabType
353 -> ListId
354 -> Versioned NgramsTablePatch
355 -> m (Versioned NgramsTablePatch)
356 tableNgramsPut tabType listId (Versioned p_version p_table)
357 | p_table == mempty = do
358 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
359 let ngramsType = ngramsTypeFromTabType tabType
360 tableNgramsPull listId ngramsType p_version
361
362 | otherwise = do
363 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
364 let ngramsType = ngramsTypeFromTabType tabType
365 (p, p_validity) = PM.singleton ngramsType p_table
366
367 assertValid p_validity
368
369 ret <- commitStatePatch listId (Versioned p_version p)
370 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
371
372 pure ret
373
374
375
376 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
377 , FlowCmdM env err m
378 , HasNodeError err
379 , HasSettings env
380 )
381 => UpdateTableNgramsCharts
382 -> (JobLog -> m ())
383 -> m JobLog
384 tableNgramsPostChartsAsync utn logStatus = do
385 let tabType = utn ^. utn_tab_type
386 let listId = utn ^. utn_list_id
387
388 node <- getNode listId
389 let nId = node ^. node_id
390 _uId = node ^. node_user_id
391 mCId = node ^. node_parent_id
392
393 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
394 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
395
396 case mCId of
397 Nothing -> do
398 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
399 pure $ jobLogFail $ jobLogInit 1
400 Just cId -> do
401 case tabType of
402 Authors -> do
403 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
404 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
405 logRef
406 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
407 logRefSuccess
408
409 getRef
410 Institutes -> do
411 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
412 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
413 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
414 logRef
415 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
416 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
417 logRefSuccess
418 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
419 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
420 logRefSuccess
421 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
422 logRefSuccess
423
424 getRef
425 Sources -> do
426 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
427 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
428 logRef
429 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
430 logRefSuccess
431
432 getRef
433 Terms -> do
434 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
435 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
436 logRef
437 {-
438 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
439 logRefSuccess
440 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
441 logRefSuccess
442 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
443 logRefSuccess
444 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
445 logRefSuccess
446 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
447 logRefSuccess
448 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
449 -}
450 logRefSuccess
451
452 getRef
453 _ -> do
454 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
455 pure $ jobLogFail $ jobLogInit 1
456
457 {-
458 { _ne_list :: ListType
459 If we merge the parents/children we can potentially create cycles!
460 , _ne_parent :: Maybe NgramsTerm
461 , _ne_children :: MSet NgramsTerm
462 }
463 -}
464
465 getNgramsTableMap :: HasNodeStory env err m
466 => NodeId
467 -> TableNgrams.NgramsType
468 -> m (Versioned NgramsTableMap)
469 getNgramsTableMap nodeId ngramsType = do
470 v <- getNodeStoryVar [nodeId]
471 repo <- liftBase $ readMVar v
472 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
473 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
474
475
476 dumpJsonTableMap :: HasNodeStory env err m
477 => Text
478 -> NodeId
479 -> TableNgrams.NgramsType
480 -> m ()
481 dumpJsonTableMap fpath nodeId ngramsType = do
482 m <- getNgramsTableMap nodeId ngramsType
483 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
484 pure ()
485
486
487 type MinSize = Int
488 type MaxSize = Int
489
490 -- | TODO Errors management
491 -- TODO: polymorphic for Annuaire or Corpus or ...
492 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
493 -- TODO: should take only one ListId
494
495
496 getTableNgrams :: forall env err m.
497 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
498 => NodeType -> NodeId -> TabType
499 -> ListId -> Limit -> Maybe Offset
500 -> Maybe ListType
501 -> Maybe MinSize -> Maybe MaxSize
502 -> Maybe OrderBy
503 -> (NgramsTerm -> Bool)
504 -> m (VersionedWithCount NgramsTable)
505 getTableNgrams _nType nId tabType listId limit_ offset
506 listType minSize maxSize orderBy searchQuery = do
507
508 t0 <- getTime
509 -- lIds <- selectNodesWithUsername NodeList userMaster
510 let
511 ngramsType = ngramsTypeFromTabType tabType
512 offset' = maybe 0 identity offset
513 listType' = maybe (const True) (==) listType
514 minSize' = maybe (const True) (<=) minSize
515 maxSize' = maybe (const True) (>=) maxSize
516
517 selected_node n = minSize' s
518 && maxSize' s
519 && searchQuery (n ^. ne_ngrams)
520 && listType' (n ^. ne_list)
521 where
522 s = n ^. ne_size
523
524 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
525
526 ---------------------------------------
527 sortOnOrder Nothing = identity
528 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
529 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
530 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
531 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
532
533 ---------------------------------------
534
535 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
536 filteredNodes tableMap = rootOf <$> list & filter selected_node
537 where
538 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
539 (tableMap ^. at r)
540 )
541 (ne ^. ne_root)
542 list = tableMap ^.. each
543
544 ---------------------------------------
545 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
546 selectAndPaginate tableMap = roots <> inners
547 where
548 list = tableMap ^.. each
549 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
550 (tableMap ^. at r)
551 )
552 (ne ^. ne_root)
553 selected_nodes = list & take limit_
554 . drop offset'
555 . filter selected_node
556 . sortOnOrder orderBy
557 roots = rootOf <$> selected_nodes
558 rootsSet = Set.fromList (_ne_ngrams <$> roots)
559 inners = list & filter (selected_inner rootsSet)
560
561 ---------------------------------------
562 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
563 setScores False table = pure table
564 setScores True table = do
565 let ngrams_terms = table ^.. each . ne_ngrams
566 t1 <- getTime
567 occurrences <- getOccByNgramsOnlyFast' nId
568 listId
569 ngramsType
570 ngrams_terms
571 t2 <- getTime
572 liftBase $ hprint stderr
573 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
574 (length ngrams_terms) t1 t2
575 {-
576 occurrences <- getOccByNgramsOnlySlow nType nId
577 (lIds <> [listId])
578 ngramsType
579 ngrams_terms
580 -}
581 let
582 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
583
584 pure $ table & each %~ setOcc
585 ---------------------------------------
586
587 -- lists <- catMaybes <$> listsWith userMaster
588 -- trace (show lists) $
589 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
590
591
592 let scoresNeeded = needsScores orderBy
593 tableMap1 <- getNgramsTableMap listId ngramsType
594 t1 <- getTime
595 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
596 . Map.mapWithKey ngramsElementFromRepo
597
598 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
599 . filteredNodes
600 let fltrCount = length $ fltr ^. v_data . _NgramsTable
601
602 t2 <- getTime
603 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
604 . setScores (not scoresNeeded)
605 . selectAndPaginate
606 t3 <- getTime
607 liftBase $ hprint stderr
608 ("getTableNgrams total=" % hasTime
609 % " map1=" % hasTime
610 % " map2=" % hasTime
611 % " map3=" % hasTime
612 % " sql=" % (if scoresNeeded then "map2" else "map3")
613 % "\n"
614 ) t0 t3 t0 t1 t1 t2 t2 t3
615 pure $ toVersionedWithCount fltrCount tableMap3
616
617
618
619 scoresRecomputeTableNgrams :: forall env err m.
620 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
621 => NodeId -> TabType -> ListId -> m Int
622 scoresRecomputeTableNgrams nId tabType listId = do
623 tableMap <- getNgramsTableMap listId ngramsType
624 _ <- tableMap & v_data %%~ setScores
625 . Map.mapWithKey ngramsElementFromRepo
626
627 pure $ 1
628 where
629 ngramsType = ngramsTypeFromTabType tabType
630
631 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
632 setScores table = do
633 let ngrams_terms = table ^.. each . ne_ngrams
634 occurrences <- getOccByNgramsOnlyFast' nId
635 listId
636 ngramsType
637 ngrams_terms
638 let
639 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
640
641 pure $ table & each %~ setOcc
642
643
644
645
646 -- APIs
647
648 -- TODO: find a better place for the code above, All APIs stay here
649
650 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
651 deriving (Generic, Enum, Bounded, Read, Show)
652
653 instance FromHttpApiData OrderBy
654 where
655 parseUrlPiece "TermAsc" = pure TermAsc
656 parseUrlPiece "TermDesc" = pure TermDesc
657 parseUrlPiece "ScoreAsc" = pure ScoreAsc
658 parseUrlPiece "ScoreDesc" = pure ScoreDesc
659 parseUrlPiece _ = Left "Unexpected value of OrderBy"
660
661
662 instance ToParamSchema OrderBy
663 instance FromJSON OrderBy
664 instance ToJSON OrderBy
665 instance ToSchema OrderBy
666 instance Arbitrary OrderBy
667 where
668 arbitrary = elements [minBound..maxBound]
669
670 needsScores :: Maybe OrderBy -> Bool
671 needsScores (Just ScoreAsc) = True
672 needsScores (Just ScoreDesc) = True
673 needsScores _ = False
674
675 type TableNgramsApiGet = Summary " Table Ngrams API Get"
676 :> QueryParamR "ngramsType" TabType
677 :> QueryParamR "list" ListId
678 :> QueryParamR "limit" Limit
679 :> QueryParam "offset" Offset
680 :> QueryParam "listType" ListType
681 :> QueryParam "minTermSize" MinSize
682 :> QueryParam "maxTermSize" MaxSize
683 :> QueryParam "orderBy" OrderBy
684 :> QueryParam "search" Text
685 :> Get '[JSON] (VersionedWithCount NgramsTable)
686
687 type TableNgramsApiPut = Summary " Table Ngrams API Change"
688 :> QueryParamR "ngramsType" TabType
689 :> QueryParamR "list" ListId
690 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
691 :> Put '[JSON] (Versioned NgramsTablePatch)
692
693 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
694 :> QueryParamR "ngramsType" TabType
695 :> QueryParamR "list" ListId
696 :> "recompute" :> Post '[JSON] Int
697
698 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
699 :> QueryParamR "ngramsType" TabType
700 :> QueryParamR "list" ListId
701 :> Get '[JSON] Version
702
703 type TableNgramsApi = TableNgramsApiGet
704 :<|> TableNgramsApiPut
705 :<|> RecomputeScoresNgramsApiGet
706 :<|> "version" :> TableNgramsApiGetVersion
707 :<|> TableNgramsAsyncApi
708
709 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
710 :> "async"
711 :> "charts"
712 :> "update"
713 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
714
715 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
716 => NodeId
717 -> TabType
718 -> ListId
719 -> Limit
720 -> Maybe Offset
721 -> Maybe ListType
722 -> Maybe MinSize -> Maybe MaxSize
723 -> Maybe OrderBy
724 -> Maybe Text -- full text search
725 -> m (VersionedWithCount NgramsTable)
726 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
727 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
728 where
729 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
730
731
732
733 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
734 => NodeId
735 -> TabType
736 -> ListId
737 -> m Version
738 getTableNgramsVersion _nId _tabType listId = currentVersion listId
739
740
741
742 -- TODO: limit?
743 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
744 -- This line above looks like a waste of computation to finally get only the version.
745 -- See the comment about listNgramsChangedSince.
746
747
748 -- | Text search is deactivated for now for ngrams by doc only
749 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
750 => DocId -> TabType
751 -> ListId -> Limit -> Maybe Offset
752 -> Maybe ListType
753 -> Maybe MinSize -> Maybe MaxSize
754 -> Maybe OrderBy
755 -> Maybe Text -- full text search
756 -> m (VersionedWithCount NgramsTable)
757 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
758 ns <- selectNodesWithUsername NodeList userMaster
759 let ngramsType = ngramsTypeFromTabType tabType
760 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
761 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
762 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
763
764
765
766 apiNgramsTableCorpus :: ( GargServerC env err m
767 )
768 => NodeId -> ServerT TableNgramsApi m
769 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
770 :<|> tableNgramsPut
771 :<|> scoresRecomputeTableNgrams cId
772 :<|> getTableNgramsVersion cId
773 :<|> apiNgramsAsync cId
774
775 apiNgramsTableDoc :: ( GargServerC env err m
776 )
777 => DocId -> ServerT TableNgramsApi m
778 apiNgramsTableDoc dId = getTableNgramsDoc dId
779 :<|> tableNgramsPut
780 :<|> scoresRecomputeTableNgrams dId
781 :<|> getTableNgramsVersion dId
782 :<|> apiNgramsAsync dId
783
784 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
785 apiNgramsAsync _dId =
786 serveJobsAPI $
787 JobFunction $ \i log ->
788 let
789 log' x = do
790 printDebug "tableNgramsPostChartsAsync" x
791 liftBase $ log x
792 in tableNgramsPostChartsAsync i log'
793
794 -- Did the given list of ngrams changed since the given version?
795 -- The returned value is versioned boolean value, meaning that one always retrieve the
796 -- latest version.
797 -- If the given version is negative then one simply receive the latest version and True.
798 -- Using this function is more precise than simply comparing the latest version number
799 -- with the local version number. Indeed there might be no change to this particular list
800 -- and still the version number has changed because of other lists.
801 --
802 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
803 -- * currentVersion: good computation, good bandwidth, bad precision.
804 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
805 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
806 listNgramsChangedSince :: HasNodeStory env err m
807 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
808 listNgramsChangedSince listId ngramsType version
809 | version < 0 =
810 Versioned <$> currentVersion listId <*> pure True
811 | otherwise =
812 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
813
814