]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[VERSION] +1 to 0.0.6.1
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , setListNgrams
33 --, rmListNgrams TODO fix before exporting
34 , apiNgramsTableCorpus
35 , apiNgramsTableDoc
36
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepoElement(..)
56 , saveNodeStory
57 , initRepo
58
59 , TabType(..)
60
61 , QueryParamR
62 , TODO
63
64 -- Internals
65 , getNgramsTableMap
66 , dumpJsonTableMap
67 , tableNgramsPull
68 , tableNgramsPut
69
70 , Version
71 , Versioned(..)
72 , VersionedWithCount(..)
73 , currentVersion
74 , listNgramsChangedSince
75 , MinSize, MaxSize, OrderBy, NgramsTable
76 , UpdateTableNgramsCharts
77 )
78 where
79
80 import Control.Concurrent
81 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
82 import Control.Monad.Reader
83 import Data.Aeson hiding ((.=))
84 import Data.Either (Either(..))
85 import Data.Foldable
86 import Data.Map.Strict (Map)
87 import Data.Maybe (fromMaybe)
88 import Data.Monoid
89 import Data.Ord (Down(..))
90 import Data.Patch.Class (Action(act), Transformable(..), ours)
91 import Data.Swagger hiding (version, patch)
92 import Data.Text (Text, isInfixOf, unpack, pack)
93 import Data.Text.Lazy.IO as DTL
94 import Formatting (hprint, int, (%))
95 import GHC.Generics (Generic)
96 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
97 import Gargantext.API.Admin.Types (HasSettings)
98 import Gargantext.API.Job
99 import Gargantext.API.Ngrams.Types
100 import Gargantext.API.Prelude
101 import Gargantext.Core.NodeStory
102 import Gargantext.Core.Mail.Types (HasMail)
103 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
104 import Gargantext.API.Ngrams.Tools
105 import Gargantext.Database.Action.Flow.Types
106 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
107 import Gargantext.Database.Admin.Config (userMaster)
108 import Gargantext.Database.Admin.Types.Node (NodeType(..))
109 import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
110 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
111 import Gargantext.Database.Query.Table.Node (getNode)
112 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
113 import Gargantext.Database.Query.Table.Node.Select
114 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
115 import Gargantext.Prelude hiding (log)
116 import Gargantext.Prelude.Clock (hasTime, getTime)
117 import Prelude (error)
118 import Servant hiding (Patch)
119 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
120 import System.IO (stderr)
121 import Test.QuickCheck (elements)
122 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
123 import qualified Data.Aeson.Text as DAT
124 import qualified Data.List as List
125 import qualified Data.Map.Strict as Map
126 import qualified Data.Map.Strict.Patch as PM
127 import qualified Data.Set as S
128 import qualified Data.Set as Set
129 import qualified Gargantext.API.Metrics as Metrics
130 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
131
132 {-
133 -- TODO sequences of modifications (Patchs)
134 type NgramsIdPatch = Patch NgramsId NgramsPatch
135
136 ngramsPatch :: Int -> NgramsPatch
137 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
138
139 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
140 toEdit n p = Edit n p
141 ngramsIdPatch :: Patch NgramsId NgramsPatch
142 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
143 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
144 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
145 ]
146
147 -- applyPatchBack :: Patch -> IO Patch
148 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
149 -}
150 ------------------------------------------------------------------------
151 ------------------------------------------------------------------------
152 ------------------------------------------------------------------------
153
154 {-
155 -- TODO: Replace.old is ignored which means that if the current list
156 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
157 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
158 -- However this should not happen in non conflicting situations.
159 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
160 mkListsUpdate nt patches =
161 [ (ngramsTypeId nt, ng, listTypeId lt)
162 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
163 , lt <- patch ^.. patch_list . new
164 ]
165
166 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
167 -> NgramsType
168 -> NgramsTablePatch
169 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
170 mkChildrenGroups addOrRem nt patches =
171 [ (ngramsTypeId nt, parent, child)
172 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
173 , child <- patch ^.. patch_children . to addOrRem . folded
174 ]
175 -}
176
177 ------------------------------------------------------------------------
178
179 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
180 => m ()
181 saveNodeStory = liftBase =<< view hasNodeStorySaver
182
183
184 listTypeConflictResolution :: ListType -> ListType -> ListType
185 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
186
187
188 ngramsStatePatchConflictResolution
189 :: TableNgrams.NgramsType
190 -> NgramsTerm
191 -> ConflictResolutionNgramsPatch
192 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
193 = (ours, (const ours, ours), (False, False))
194 -- (False, False) mean here that Mod has always priority.
195 -- (True, False) <- would mean priority to the left (same as ours).
196 -- undefined {- TODO think this through -}, listTypeConflictResolution)
197
198
199
200
201 -- Current state:
202 -- Insertions are not considered as patches,
203 -- they do not extend history,
204 -- they do not bump version.
205 insertNewOnly :: a -> Maybe b -> a
206 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
207 -- TODO error handling
208
209 {- unused
210 -- TODO refactor with putListNgrams
211 copyListNgrams :: RepoCmdM env err m
212 => NodeId -> NodeId -> NgramsType
213 -> m ()
214 copyListNgrams srcListId dstListId ngramsType = do
215 var <- view repoVar
216 liftBase $ modifyMVar_ var $
217 pure . (r_state . at ngramsType %~ (Just . f . something))
218 saveNodeStory
219 where
220 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
221 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
222
223 -- TODO refactor with putListNgrams
224 -- The list must be non-empty!
225 -- The added ngrams must be non-existent!
226 addListNgrams :: RepoCmdM env err m
227 => NodeId -> NgramsType
228 -> [NgramsElement] -> m ()
229 addListNgrams listId ngramsType nes = do
230 var <- view repoVar
231 liftBase $ modifyMVar_ var $
232 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
233 saveNodeStory
234 where
235 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
236 -}
237
238 -- | TODO: incr the Version number
239 -- && should use patch
240 -- UNSAFE
241
242 setListNgrams :: HasNodeStory env err m
243 => NodeId
244 -> TableNgrams.NgramsType
245 -> Map NgramsTerm NgramsRepoElement
246 -> m ()
247 setListNgrams listId ngramsType ns = do
248 -- printDebug "[setListNgrams]" (listId, ngramsType)
249 getter <- view hasNodeStory
250 var <- liftBase $ (getter ^. nse_getter) [listId]
251 liftBase $ modifyMVar_ var $
252 pure . ( unNodeStory
253 . at listId . _Just
254 . a_state
255 . at ngramsType
256 .~ Just ns
257 )
258 saveNodeStory
259
260
261 currentVersion :: HasNodeStory env err m
262 => ListId -> m Version
263 currentVersion listId = do
264 --nls <- getRepo [listId]
265 pool <- view connPool
266 nls <- liftBase $ getNodeStory pool listId
267 pure $ nls ^. unNodeStory . at listId . _Just . a_version
268
269
270 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
271 newNgramsFromNgramsStatePatch p =
272 [ text2ngrams (unNgramsTerm n)
273 | (n,np) <- p ^.. _PatchMap
274 -- . each . _PatchMap
275 . each . _NgramsTablePatch
276 . _PatchMap . ifolded . withIndex
277 , _ <- np ^.. patch_new . _Just
278 ]
279
280
281
282
283 commitStatePatch :: (HasNodeStory env err m, HasMail env)
284 => ListId
285 -> Versioned NgramsStatePatch'
286 -> m (Versioned NgramsStatePatch')
287 commitStatePatch listId (Versioned _p_version p) = do
288 -- printDebug "[commitStatePatch]" listId
289 var <- getNodeStoryVar [listId]
290 vq' <- liftBase $ modifyMVar var $ \ns -> do
291 let
292 a = ns ^. unNodeStory . at listId . _Just
293 -- apply patches from version p_version to a ^. a_version
294 -- TODO Check this
295 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
296 q = mconcat $ a ^. a_history
297 (p', q') = transformWith ngramsStatePatchConflictResolution p q
298 a' = a & a_version +~ 1
299 & a_state %~ act p'
300 & a_history %~ (p' :)
301
302 {-
303 -- Ideally we would like to check these properties. However:
304 -- * They should be checked only to debug the code. The client data
305 -- should be able to trigger these.
306 -- * What kind of error should they throw (we are in IO here)?
307 -- * Should we keep modifyMVar?
308 -- * Should we throw the validation in an Exception, catch it around
309 -- modifyMVar and throw it back as an Error?
310 assertValid $ transformable p q
311 assertValid $ applicable p' (r ^. r_state)
312 -}
313 printDebug "[commitStatePatch] a version" (a ^. a_version)
314 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
315 pure ( ns & unNodeStory . at listId .~ (Just a')
316 , Versioned (a' ^. a_version) q'
317 )
318 saveNodeStory
319 -- Save new ngrams
320 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
321
322 pure vq'
323
324
325
326 -- This is a special case of tableNgramsPut where the input patch is empty.
327 tableNgramsPull :: HasNodeStory env err m
328 => ListId
329 -> TableNgrams.NgramsType
330 -> Version
331 -> m (Versioned NgramsTablePatch)
332 tableNgramsPull listId ngramsType p_version = do
333 printDebug "[tableNgramsPull]" (listId, ngramsType)
334 var <- getNodeStoryVar [listId]
335 r <- liftBase $ readMVar var
336
337 let
338 a = r ^. unNodeStory . at listId . _Just
339 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
340 q_table = q ^. _PatchMap . at ngramsType . _Just
341
342 pure (Versioned (a ^. a_version) q_table)
343
344
345
346
347 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
348 -- Apply the given patch to the DB and returns the patch to be applied on the
349 -- client.
350 -- TODO-ACCESS check
351 tableNgramsPut :: ( HasNodeStory env err m
352 , HasInvalidError err
353 , HasSettings env
354 , HasMail env
355 )
356 => TabType
357 -> ListId
358 -> Versioned NgramsTablePatch
359 -> m (Versioned NgramsTablePatch)
360 tableNgramsPut tabType listId (Versioned p_version p_table)
361 | p_table == mempty = do
362 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
363 let ngramsType = ngramsTypeFromTabType tabType
364 tableNgramsPull listId ngramsType p_version
365
366 | otherwise = do
367 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
368 let ngramsType = ngramsTypeFromTabType tabType
369 (p, p_validity) = PM.singleton ngramsType p_table
370
371 assertValid p_validity
372
373 ret <- commitStatePatch listId (Versioned p_version p)
374 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
375
376 pure ret
377
378
379
380 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
381 , FlowCmdM env err m
382 , HasNodeError err
383 , HasSettings env
384 )
385 => UpdateTableNgramsCharts
386 -> (JobLog -> m ())
387 -> m JobLog
388 tableNgramsPostChartsAsync utn logStatus = do
389 let tabType = utn ^. utn_tab_type
390 let listId = utn ^. utn_list_id
391
392 node <- getNode listId
393 let nId = node ^. node_id
394 _uId = node ^. node_user_id
395 mCId = node ^. node_parent_id
396
397 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
398 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
399
400 case mCId of
401 Nothing -> do
402 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
403 pure $ jobLogFail $ jobLogInit 1
404 Just cId -> do
405 case tabType of
406 Authors -> do
407 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
408 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
409 logRef
410 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
411 logRefSuccess
412
413 getRef
414 Institutes -> do
415 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
416 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
417 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
418 logRef
419 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
420 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
421 logRefSuccess
422 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
423 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
424 logRefSuccess
425 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
426 logRefSuccess
427
428 getRef
429 Sources -> do
430 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
431 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
432 logRef
433 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
434 logRefSuccess
435
436 getRef
437 Terms -> do
438 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
439 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
440 logRef
441 {-
442 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
443 logRefSuccess
444 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
445 logRefSuccess
446 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
447 logRefSuccess
448 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
449 logRefSuccess
450 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
451 logRefSuccess
452 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
453 -}
454 logRefSuccess
455
456 getRef
457 _ -> do
458 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
459 pure $ jobLogFail $ jobLogInit 1
460
461 {-
462 { _ne_list :: ListType
463 If we merge the parents/children we can potentially create cycles!
464 , _ne_parent :: Maybe NgramsTerm
465 , _ne_children :: MSet NgramsTerm
466 }
467 -}
468
469 getNgramsTableMap :: HasNodeStory env err m
470 => NodeId
471 -> TableNgrams.NgramsType
472 -> m (Versioned NgramsTableMap)
473 getNgramsTableMap nodeId ngramsType = do
474 v <- getNodeStoryVar [nodeId]
475 repo <- liftBase $ readMVar v
476 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
477 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
478
479
480 dumpJsonTableMap :: HasNodeStory env err m
481 => Text
482 -> NodeId
483 -> TableNgrams.NgramsType
484 -> m ()
485 dumpJsonTableMap fpath nodeId ngramsType = do
486 m <- getNgramsTableMap nodeId ngramsType
487 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
488 pure ()
489
490
491 type MinSize = Int
492 type MaxSize = Int
493
494 -- | TODO Errors management
495 -- TODO: polymorphic for Annuaire or Corpus or ...
496 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
497 -- TODO: should take only one ListId
498
499
500 getTableNgrams :: forall env err m.
501 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
502 => NodeType -> NodeId -> TabType
503 -> ListId -> Limit -> Maybe Offset
504 -> Maybe ListType
505 -> Maybe MinSize -> Maybe MaxSize
506 -> Maybe OrderBy
507 -> (NgramsTerm -> Bool)
508 -> m (VersionedWithCount NgramsTable)
509 getTableNgrams _nType nId tabType listId limit_ offset
510 listType minSize maxSize orderBy searchQuery = do
511
512 t0 <- getTime
513 -- lIds <- selectNodesWithUsername NodeList userMaster
514 let
515 ngramsType = ngramsTypeFromTabType tabType
516 offset' = maybe 0 identity offset
517 listType' = maybe (const True) (==) listType
518 minSize' = maybe (const True) (<=) minSize
519 maxSize' = maybe (const True) (>=) maxSize
520
521 selected_node n = minSize' s
522 && maxSize' s
523 && searchQuery (n ^. ne_ngrams)
524 && listType' (n ^. ne_list)
525 where
526 s = n ^. ne_size
527
528 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
529
530 ---------------------------------------
531 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
532 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
533 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
534 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
535 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
536
537 ---------------------------------------
538 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
539 filteredNodes tableMap = rootOf <$> list & filter selected_node
540 where
541 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
542 (tableMap ^. at r)
543 )
544 (ne ^. ne_root)
545 list = tableMap ^.. each
546
547 ---------------------------------------
548 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
549 selectAndPaginate tableMap = roots <> inners
550 where
551 list = tableMap ^.. each
552 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
553 (tableMap ^. at r)
554 )
555 (ne ^. ne_root)
556 selected_nodes = list & take limit_
557 . drop offset'
558 . filter selected_node
559 . sortOnOrder orderBy
560 roots = rootOf <$> selected_nodes
561 rootsSet = Set.fromList (_ne_ngrams <$> roots)
562 inners = list & filter (selected_inner rootsSet)
563
564 ---------------------------------------
565 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
566 setScores False table = pure table
567 setScores True table = do
568 let ngrams_terms = table ^.. each . ne_ngrams
569 -- printDebug "ngrams_terms" ngrams_terms
570 t1 <- getTime
571 occurrences <- getOccByNgramsOnlyFast' nId
572 listId
573 ngramsType
574 ngrams_terms
575 --printDebug "occurrences" occurrences
576 t2 <- getTime
577 liftBase $ hprint stderr
578 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
579 (length ngrams_terms) t1 t2
580 let
581 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
582
583 pure $ table & each %~ setOcc
584 ---------------------------------------
585
586 -- lists <- catMaybes <$> listsWith userMaster
587 -- trace (show lists) $
588 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
589
590
591 let scoresNeeded = needsScores orderBy
592 tableMap1 <- getNgramsTableMap listId ngramsType
593 t1 <- getTime
594
595 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
596 . Map.mapWithKey ngramsElementFromRepo
597
598 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
599 . filteredNodes
600
601 let fltrCount = length $ fltr ^. v_data . _NgramsTable
602
603 t2 <- getTime
604 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
605 . setScores (not scoresNeeded)
606 . selectAndPaginate
607 t3 <- getTime
608 liftBase $ hprint stderr
609 ("getTableNgrams total=" % hasTime
610 % " map1=" % hasTime
611 % " map2=" % hasTime
612 % " map3=" % hasTime
613 % " sql=" % (if scoresNeeded then "map2" else "map3")
614 % "\n"
615 ) t0 t3 t0 t1 t1 t2 t2 t3
616 pure $ toVersionedWithCount fltrCount tableMap3
617
618
619
620 scoresRecomputeTableNgrams :: forall env err m.
621 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
622 => NodeId -> TabType -> ListId -> m Int
623 scoresRecomputeTableNgrams nId tabType listId = do
624 tableMap <- getNgramsTableMap listId ngramsType
625 _ <- tableMap & v_data %%~ setScores
626 . Map.mapWithKey ngramsElementFromRepo
627
628 pure $ 1
629 where
630 ngramsType = ngramsTypeFromTabType tabType
631
632 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
633 setScores table = do
634 let ngrams_terms = table ^.. each . ne_ngrams
635 occurrences <- getOccByNgramsOnlyFast' nId
636 listId
637 ngramsType
638 ngrams_terms
639 let
640 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
641
642 pure $ table & each %~ setOcc
643
644
645
646
647 -- APIs
648
649 -- TODO: find a better place for the code above, All APIs stay here
650
651 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
652 deriving (Generic, Enum, Bounded, Read, Show)
653
654 instance FromHttpApiData OrderBy
655 where
656 parseUrlPiece "TermAsc" = pure TermAsc
657 parseUrlPiece "TermDesc" = pure TermDesc
658 parseUrlPiece "ScoreAsc" = pure ScoreAsc
659 parseUrlPiece "ScoreDesc" = pure ScoreDesc
660 parseUrlPiece _ = Left "Unexpected value of OrderBy"
661
662 instance ToHttpApiData OrderBy where
663 toUrlPiece = pack . show
664
665 instance ToParamSchema OrderBy
666 instance FromJSON OrderBy
667 instance ToJSON OrderBy
668 instance ToSchema OrderBy
669 instance Arbitrary OrderBy
670 where
671 arbitrary = elements [minBound..maxBound]
672
673 needsScores :: Maybe OrderBy -> Bool
674 needsScores (Just ScoreAsc) = True
675 needsScores (Just ScoreDesc) = True
676 needsScores _ = False
677
678 type TableNgramsApiGet = Summary " Table Ngrams API Get"
679 :> QueryParamR "ngramsType" TabType
680 :> QueryParamR "list" ListId
681 :> QueryParamR "limit" Limit
682 :> QueryParam "offset" Offset
683 :> QueryParam "listType" ListType
684 :> QueryParam "minTermSize" MinSize
685 :> QueryParam "maxTermSize" MaxSize
686 :> QueryParam "orderBy" OrderBy
687 :> QueryParam "search" Text
688 :> Get '[JSON] (VersionedWithCount NgramsTable)
689
690 type TableNgramsApiPut = Summary " Table Ngrams API Change"
691 :> QueryParamR "ngramsType" TabType
692 :> QueryParamR "list" ListId
693 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
694 :> Put '[JSON] (Versioned NgramsTablePatch)
695
696 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
697 :> QueryParamR "ngramsType" TabType
698 :> QueryParamR "list" ListId
699 :> "recompute" :> Post '[JSON] Int
700
701 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
702 :> QueryParamR "ngramsType" TabType
703 :> QueryParamR "list" ListId
704 :> Get '[JSON] Version
705
706 type TableNgramsApi = TableNgramsApiGet
707 :<|> TableNgramsApiPut
708 :<|> RecomputeScoresNgramsApiGet
709 :<|> "version" :> TableNgramsApiGetVersion
710 :<|> TableNgramsAsyncApi
711
712 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
713 :> "async"
714 :> "charts"
715 :> "update"
716 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
717
718 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
719 => NodeId
720 -> TabType
721 -> ListId
722 -> Limit
723 -> Maybe Offset
724 -> Maybe ListType
725 -> Maybe MinSize -> Maybe MaxSize
726 -> Maybe OrderBy
727 -> Maybe Text -- full text search
728 -> m (VersionedWithCount NgramsTable)
729 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
730 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
731 where
732 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
733
734
735
736 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
737 => NodeId
738 -> TabType
739 -> ListId
740 -> m Version
741 getTableNgramsVersion _nId _tabType listId = currentVersion listId
742
743
744
745 -- TODO: limit?
746 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
747 -- This line above looks like a waste of computation to finally get only the version.
748 -- See the comment about listNgramsChangedSince.
749
750
751 -- | Text search is deactivated for now for ngrams by doc only
752 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
753 => DocId -> TabType
754 -> ListId -> Limit -> Maybe Offset
755 -> Maybe ListType
756 -> Maybe MinSize -> Maybe MaxSize
757 -> Maybe OrderBy
758 -> Maybe Text -- full text search
759 -> m (VersionedWithCount NgramsTable)
760 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
761 ns <- selectNodesWithUsername NodeList userMaster
762 let ngramsType = ngramsTypeFromTabType tabType
763 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
764 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
765 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
766
767
768
769 apiNgramsTableCorpus :: ( GargServerC env err m
770 )
771 => NodeId -> ServerT TableNgramsApi m
772 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
773 :<|> tableNgramsPut
774 :<|> scoresRecomputeTableNgrams cId
775 :<|> getTableNgramsVersion cId
776 :<|> apiNgramsAsync cId
777
778 apiNgramsTableDoc :: ( GargServerC env err m
779 )
780 => DocId -> ServerT TableNgramsApi m
781 apiNgramsTableDoc dId = getTableNgramsDoc dId
782 :<|> tableNgramsPut
783 :<|> scoresRecomputeTableNgrams dId
784 :<|> getTableNgramsVersion dId
785 :<|> apiNgramsAsync dId
786
787 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
788 apiNgramsAsync _dId =
789 serveJobsAPI $
790 JobFunction $ \i log ->
791 let
792 log' x = do
793 printDebug "tableNgramsPostChartsAsync" x
794 liftBase $ log x
795 in tableNgramsPostChartsAsync i log'
796
797 -- Did the given list of ngrams changed since the given version?
798 -- The returned value is versioned boolean value, meaning that one always retrieve the
799 -- latest version.
800 -- If the given version is negative then one simply receive the latest version and True.
801 -- Using this function is more precise than simply comparing the latest version number
802 -- with the local version number. Indeed there might be no change to this particular list
803 -- and still the version number has changed because of other lists.
804 --
805 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
806 -- * currentVersion: good computation, good bandwidth, bad precision.
807 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
808 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
809 listNgramsChangedSince :: HasNodeStory env err m
810 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
811 listNgramsChangedSince listId ngramsType version
812 | version < 0 =
813 Versioned <$> currentVersion listId <*> pure True
814 | otherwise =
815 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)