]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
Merge branch 'dev' into dev-sources-chart-sort
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , setListNgrams
33 --, rmListNgrams TODO fix before exporting
34 , apiNgramsTableCorpus
35 , apiNgramsTableDoc
36
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepoElement(..)
56 , saveNodeStory
57 , initRepo
58
59 , TabType(..)
60
61 , QueryParamR
62 , TODO
63
64 -- Internals
65 , getNgramsTableMap
66 , dumpJsonTableMap
67 , tableNgramsPull
68 , tableNgramsPut
69
70 , Version
71 , Versioned(..)
72 , VersionedWithCount(..)
73 , currentVersion
74 , listNgramsChangedSince
75 , MinSize, MaxSize, OrderBy, NgramsTable
76 , UpdateTableNgramsCharts
77 )
78 where
79
80 import Control.Concurrent
81 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
82 import Control.Monad.Reader
83 import Data.Aeson hiding ((.=))
84 import Data.Either (Either(..))
85 import Data.Foldable
86 import Data.Map.Strict (Map)
87 import Data.Maybe (fromMaybe)
88 import Data.Monoid
89 import Data.Ord (Down(..))
90 import Data.Patch.Class (Action(act), Transformable(..), ours)
91 import Data.Swagger hiding (version, patch)
92 import Data.Text (Text, isInfixOf, unpack, pack)
93 import Data.Text.Lazy.IO as DTL
94 import Formatting (hprint, int, (%))
95 import GHC.Generics (Generic)
96 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
97 import Gargantext.API.Admin.Types (HasSettings)
98 import Gargantext.API.Job
99 import Gargantext.API.Ngrams.Types
100 import Gargantext.API.Prelude
101 import Gargantext.Core.NodeStory
102 import Gargantext.Core.Mail.Types (HasMail)
103 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
104 import Gargantext.API.Ngrams.Tools
105 import Gargantext.Database.Action.Flow.Types
106 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
107 import Gargantext.Database.Admin.Config (userMaster)
108 import Gargantext.Database.Admin.Types.Node (NodeType(..))
109 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
110 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
111 import Gargantext.Database.Query.Table.Node (getNode)
112 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
113 import Gargantext.Database.Query.Table.Node.Select
114 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
115 import Gargantext.Prelude hiding (log)
116 import Gargantext.Prelude.Clock (hasTime, getTime)
117 import Prelude (error)
118 import Servant hiding (Patch)
119 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
120 import System.IO (stderr)
121 import Test.QuickCheck (elements)
122 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
123 import qualified Data.Aeson.Text as DAT
124 import qualified Data.List as List
125 import qualified Data.Map.Strict as Map
126 import qualified Data.Map.Strict.Patch as PM
127 import qualified Data.Set as S
128 import qualified Data.Set as Set
129 import qualified Gargantext.API.Metrics as Metrics
130 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
131
132 {-
133 -- TODO sequences of modifications (Patchs)
134 type NgramsIdPatch = Patch NgramsId NgramsPatch
135
136 ngramsPatch :: Int -> NgramsPatch
137 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
138
139 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
140 toEdit n p = Edit n p
141 ngramsIdPatch :: Patch NgramsId NgramsPatch
142 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
143 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
144 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
145 ]
146
147 -- applyPatchBack :: Patch -> IO Patch
148 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
149 -}
150 ------------------------------------------------------------------------
151 ------------------------------------------------------------------------
152 ------------------------------------------------------------------------
153
154 {-
155 -- TODO: Replace.old is ignored which means that if the current list
156 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
157 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
158 -- However this should not happen in non conflicting situations.
159 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
160 mkListsUpdate nt patches =
161 [ (ngramsTypeId nt, ng, listTypeId lt)
162 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
163 , lt <- patch ^.. patch_list . new
164 ]
165
166 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
167 -> NgramsType
168 -> NgramsTablePatch
169 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
170 mkChildrenGroups addOrRem nt patches =
171 [ (ngramsTypeId nt, parent, child)
172 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
173 , child <- patch ^.. patch_children . to addOrRem . folded
174 ]
175 -}
176
177 ------------------------------------------------------------------------
178
179 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
180 => m ()
181 saveNodeStory = liftBase =<< view hasNodeStorySaver
182
183
184 listTypeConflictResolution :: ListType -> ListType -> ListType
185 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
186
187
188 ngramsStatePatchConflictResolution
189 :: TableNgrams.NgramsType
190 -> NgramsTerm
191 -> ConflictResolutionNgramsPatch
192 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
193 = (ours, (const ours, ours), (False, False))
194 -- (False, False) mean here that Mod has always priority.
195 -- (True, False) <- would mean priority to the left (same as ours).
196 -- undefined {- TODO think this through -}, listTypeConflictResolution)
197
198
199
200
201 -- Current state:
202 -- Insertions are not considered as patches,
203 -- they do not extend history,
204 -- they do not bump version.
205 insertNewOnly :: a -> Maybe b -> a
206 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
207 -- TODO error handling
208
209 {- unused
210 -- TODO refactor with putListNgrams
211 copyListNgrams :: RepoCmdM env err m
212 => NodeId -> NodeId -> NgramsType
213 -> m ()
214 copyListNgrams srcListId dstListId ngramsType = do
215 var <- view repoVar
216 liftBase $ modifyMVar_ var $
217 pure . (r_state . at ngramsType %~ (Just . f . something))
218 saveNodeStory
219 where
220 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
221 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
222
223 -- TODO refactor with putListNgrams
224 -- The list must be non-empty!
225 -- The added ngrams must be non-existent!
226 addListNgrams :: RepoCmdM env err m
227 => NodeId -> NgramsType
228 -> [NgramsElement] -> m ()
229 addListNgrams listId ngramsType nes = do
230 var <- view repoVar
231 liftBase $ modifyMVar_ var $
232 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
233 saveNodeStory
234 where
235 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
236 -}
237
238 -- | TODO: incr the Version number
239 -- && should use patch
240 -- UNSAFE
241
242 setListNgrams :: HasNodeStory env err m
243 => NodeId
244 -> TableNgrams.NgramsType
245 -> Map NgramsTerm NgramsRepoElement
246 -> m ()
247 setListNgrams listId ngramsType ns = do
248 printDebug "[setListNgrams]" (listId, ngramsType)
249 getter <- view hasNodeStory
250 var <- liftBase $ (getter ^. nse_getter) [listId]
251 liftBase $ modifyMVar_ var $
252 pure . ( unNodeStory
253 . at listId . _Just
254 . a_state
255 . at ngramsType
256 .~ Just ns
257 )
258 saveNodeStory
259
260
261 currentVersion :: HasNodeStory env err m
262 => ListId -> m Version
263 currentVersion listId = do
264 nls <- getRepo' [listId]
265 pure $ nls ^. unNodeStory . at listId . _Just . a_version
266
267
268 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
269 newNgramsFromNgramsStatePatch p =
270 [ text2ngrams (unNgramsTerm n)
271 | (n,np) <- p ^.. _PatchMap
272 -- . each . _PatchMap
273 . each . _NgramsTablePatch
274 . _PatchMap . ifolded . withIndex
275 , _ <- np ^.. patch_new . _Just
276 ]
277
278
279
280
281 commitStatePatch :: (HasNodeStory env err m, HasMail env)
282 => ListId
283 -> Versioned NgramsStatePatch'
284 -> m (Versioned NgramsStatePatch')
285 commitStatePatch listId (Versioned p_version p) = do
286 printDebug "[commitStatePatch]" listId
287 var <- getNodeStoryVar [listId]
288 vq' <- liftBase $ modifyMVar var $ \ns -> do
289 let
290 a = ns ^. unNodeStory . at listId . _Just
291 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
292 (p', q') = transformWith ngramsStatePatchConflictResolution p q
293 a' = a & a_version +~ 1
294 & a_state %~ act p'
295 & a_history %~ (p' :)
296
297 {-
298 -- Ideally we would like to check these properties. However:
299 -- * They should be checked only to debug the code. The client data
300 -- should be able to trigger these.
301 -- * What kind of error should they throw (we are in IO here)?
302 -- * Should we keep modifyMVar?
303 -- * Should we throw the validation in an Exception, catch it around
304 -- modifyMVar and throw it back as an Error?
305 assertValid $ transformable p q
306 assertValid $ applicable p' (r ^. r_state)
307 -}
308 printDebug "[commitStatePatch] a version" (a ^. a_version)
309 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
310 pure ( ns & unNodeStory . at listId .~ (Just a')
311 , Versioned (a' ^. a_version) q'
312 )
313 saveNodeStory
314 -- Save new ngrams
315 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
316
317 pure vq'
318
319
320
321 -- This is a special case of tableNgramsPut where the input patch is empty.
322 tableNgramsPull :: HasNodeStory env err m
323 => ListId
324 -> TableNgrams.NgramsType
325 -> Version
326 -> m (Versioned NgramsTablePatch)
327 tableNgramsPull listId ngramsType p_version = do
328 printDebug "[tableNgramsPull]" (listId, ngramsType)
329 var <- getNodeStoryVar [listId]
330 r <- liftBase $ readMVar var
331
332 let
333 a = r ^. unNodeStory . at listId . _Just
334 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
335 q_table = q ^. _PatchMap . at ngramsType . _Just
336
337 pure (Versioned (a ^. a_version) q_table)
338
339
340
341
342 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
343 -- Apply the given patch to the DB and returns the patch to be applied on the
344 -- client.
345 -- TODO-ACCESS check
346 tableNgramsPut :: ( HasNodeStory env err m
347 , HasInvalidError err
348 , HasSettings env
349 , HasMail env
350 )
351 => TabType
352 -> ListId
353 -> Versioned NgramsTablePatch
354 -> m (Versioned NgramsTablePatch)
355 tableNgramsPut tabType listId (Versioned p_version p_table)
356 | p_table == mempty = do
357 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
358 let ngramsType = ngramsTypeFromTabType tabType
359 tableNgramsPull listId ngramsType p_version
360
361 | otherwise = do
362 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
363 let ngramsType = ngramsTypeFromTabType tabType
364 (p, p_validity) = PM.singleton ngramsType p_table
365
366 assertValid p_validity
367
368 ret <- commitStatePatch listId (Versioned p_version p)
369 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
370
371 pure ret
372
373
374
375 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
376 , FlowCmdM env err m
377 , HasNodeError err
378 , HasSettings env
379 )
380 => UpdateTableNgramsCharts
381 -> (JobLog -> m ())
382 -> m JobLog
383 tableNgramsPostChartsAsync utn logStatus = do
384 let tabType = utn ^. utn_tab_type
385 let listId = utn ^. utn_list_id
386
387 node <- getNode listId
388 let nId = node ^. node_id
389 _uId = node ^. node_user_id
390 mCId = node ^. node_parent_id
391
392 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
393 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
394
395 case mCId of
396 Nothing -> do
397 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
398 pure $ jobLogFail $ jobLogInit 1
399 Just cId -> do
400 case tabType of
401 Authors -> do
402 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
403 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
404 logRef
405 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
406 logRefSuccess
407
408 getRef
409 Institutes -> do
410 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
411 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
412 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
413 logRef
414 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
415 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
416 logRefSuccess
417 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
418 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
419 logRefSuccess
420 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
421 logRefSuccess
422
423 getRef
424 Sources -> do
425 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
426 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
427 logRef
428 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
429 logRefSuccess
430
431 getRef
432 Terms -> do
433 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
434 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
435 logRef
436 {-
437 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
438 logRefSuccess
439 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
440 logRefSuccess
441 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
442 logRefSuccess
443 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
444 logRefSuccess
445 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
446 logRefSuccess
447 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
448 -}
449 logRefSuccess
450
451 getRef
452 _ -> do
453 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
454 pure $ jobLogFail $ jobLogInit 1
455
456 {-
457 { _ne_list :: ListType
458 If we merge the parents/children we can potentially create cycles!
459 , _ne_parent :: Maybe NgramsTerm
460 , _ne_children :: MSet NgramsTerm
461 }
462 -}
463
464 getNgramsTableMap :: HasNodeStory env err m
465 => NodeId
466 -> TableNgrams.NgramsType
467 -> m (Versioned NgramsTableMap)
468 getNgramsTableMap nodeId ngramsType = do
469 v <- getNodeStoryVar [nodeId]
470 repo <- liftBase $ readMVar v
471 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
472 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
473
474
475 dumpJsonTableMap :: HasNodeStory env err m
476 => Text
477 -> NodeId
478 -> TableNgrams.NgramsType
479 -> m ()
480 dumpJsonTableMap fpath nodeId ngramsType = do
481 m <- getNgramsTableMap nodeId ngramsType
482 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
483 pure ()
484
485
486 type MinSize = Int
487 type MaxSize = Int
488
489 -- | TODO Errors management
490 -- TODO: polymorphic for Annuaire or Corpus or ...
491 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
492 -- TODO: should take only one ListId
493
494
495 getTableNgrams :: forall env err m.
496 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
497 => NodeType -> NodeId -> TabType
498 -> ListId -> Limit -> Maybe Offset
499 -> Maybe ListType
500 -> Maybe MinSize -> Maybe MaxSize
501 -> Maybe OrderBy
502 -> (NgramsTerm -> Bool)
503 -> m (VersionedWithCount NgramsTable)
504 getTableNgrams _nType nId tabType listId limit_ offset
505 listType minSize maxSize orderBy searchQuery = do
506
507 t0 <- getTime
508 -- lIds <- selectNodesWithUsername NodeList userMaster
509 let
510 ngramsType = ngramsTypeFromTabType tabType
511 offset' = maybe 0 identity offset
512 listType' = maybe (const True) (==) listType
513 minSize' = maybe (const True) (<=) minSize
514 maxSize' = maybe (const True) (>=) maxSize
515
516 selected_node n = minSize' s
517 && maxSize' s
518 && searchQuery (n ^. ne_ngrams)
519 && listType' (n ^. ne_list)
520 where
521 s = n ^. ne_size
522
523 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
524
525 ---------------------------------------
526 sortOnOrder Nothing = identity
527 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
528 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
529 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
530 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
531
532 ---------------------------------------
533
534 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
535 filteredNodes tableMap = rootOf <$> list & filter selected_node
536 where
537 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
538 (tableMap ^. at r)
539 )
540 (ne ^. ne_root)
541 list = tableMap ^.. each
542
543 ---------------------------------------
544 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
545 selectAndPaginate tableMap = roots <> inners
546 where
547 list = tableMap ^.. each
548 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
549 (tableMap ^. at r)
550 )
551 (ne ^. ne_root)
552 selected_nodes = list & take limit_
553 . drop offset'
554 . filter selected_node
555 . sortOnOrder orderBy
556 roots = rootOf <$> selected_nodes
557 rootsSet = Set.fromList (_ne_ngrams <$> roots)
558 inners = list & filter (selected_inner rootsSet)
559
560 ---------------------------------------
561 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
562 setScores False table = pure table
563 setScores True table = do
564 let ngrams_terms = table ^.. each . ne_ngrams
565 t1 <- getTime
566 occurrences <- getOccByNgramsOnlyFast' nId
567 listId
568 ngramsType
569 ngrams_terms
570 t2 <- getTime
571 liftBase $ hprint stderr
572 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
573 (length ngrams_terms) t1 t2
574 {-
575 occurrences <- getOccByNgramsOnlySlow nType nId
576 (lIds <> [listId])
577 ngramsType
578 ngrams_terms
579 -}
580 let
581 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
582
583 pure $ table & each %~ setOcc
584 ---------------------------------------
585
586 -- lists <- catMaybes <$> listsWith userMaster
587 -- trace (show lists) $
588 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
589
590
591 let scoresNeeded = needsScores orderBy
592 tableMap1 <- getNgramsTableMap listId ngramsType
593 t1 <- getTime
594 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
595 . Map.mapWithKey ngramsElementFromRepo
596
597 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
598 . filteredNodes
599 let fltrCount = length $ fltr ^. v_data . _NgramsTable
600
601 t2 <- getTime
602 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
603 . setScores (not scoresNeeded)
604 . selectAndPaginate
605 t3 <- getTime
606 liftBase $ hprint stderr
607 ("getTableNgrams total=" % hasTime
608 % " map1=" % hasTime
609 % " map2=" % hasTime
610 % " map3=" % hasTime
611 % " sql=" % (if scoresNeeded then "map2" else "map3")
612 % "\n"
613 ) t0 t3 t0 t1 t1 t2 t2 t3
614 pure $ toVersionedWithCount fltrCount tableMap3
615
616
617
618 scoresRecomputeTableNgrams :: forall env err m.
619 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
620 => NodeId -> TabType -> ListId -> m Int
621 scoresRecomputeTableNgrams nId tabType listId = do
622 tableMap <- getNgramsTableMap listId ngramsType
623 _ <- tableMap & v_data %%~ setScores
624 . Map.mapWithKey ngramsElementFromRepo
625
626 pure $ 1
627 where
628 ngramsType = ngramsTypeFromTabType tabType
629
630 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
631 setScores table = do
632 let ngrams_terms = table ^.. each . ne_ngrams
633 occurrences <- getOccByNgramsOnlyFast' nId
634 listId
635 ngramsType
636 ngrams_terms
637 let
638 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
639
640 pure $ table & each %~ setOcc
641
642
643
644
645 -- APIs
646
647 -- TODO: find a better place for the code above, All APIs stay here
648
649 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
650 deriving (Generic, Enum, Bounded, Read, Show)
651
652 instance FromHttpApiData OrderBy
653 where
654 parseUrlPiece "TermAsc" = pure TermAsc
655 parseUrlPiece "TermDesc" = pure TermDesc
656 parseUrlPiece "ScoreAsc" = pure ScoreAsc
657 parseUrlPiece "ScoreDesc" = pure ScoreDesc
658 parseUrlPiece _ = Left "Unexpected value of OrderBy"
659
660 instance ToHttpApiData OrderBy where
661 toUrlPiece = pack . show
662
663 instance ToParamSchema OrderBy
664 instance FromJSON OrderBy
665 instance ToJSON OrderBy
666 instance ToSchema OrderBy
667 instance Arbitrary OrderBy
668 where
669 arbitrary = elements [minBound..maxBound]
670
671 needsScores :: Maybe OrderBy -> Bool
672 needsScores (Just ScoreAsc) = True
673 needsScores (Just ScoreDesc) = True
674 needsScores _ = False
675
676 type TableNgramsApiGet = Summary " Table Ngrams API Get"
677 :> QueryParamR "ngramsType" TabType
678 :> QueryParamR "list" ListId
679 :> QueryParamR "limit" Limit
680 :> QueryParam "offset" Offset
681 :> QueryParam "listType" ListType
682 :> QueryParam "minTermSize" MinSize
683 :> QueryParam "maxTermSize" MaxSize
684 :> QueryParam "orderBy" OrderBy
685 :> QueryParam "search" Text
686 :> Get '[JSON] (VersionedWithCount NgramsTable)
687
688 type TableNgramsApiPut = Summary " Table Ngrams API Change"
689 :> QueryParamR "ngramsType" TabType
690 :> QueryParamR "list" ListId
691 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
692 :> Put '[JSON] (Versioned NgramsTablePatch)
693
694 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
695 :> QueryParamR "ngramsType" TabType
696 :> QueryParamR "list" ListId
697 :> "recompute" :> Post '[JSON] Int
698
699 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
700 :> QueryParamR "ngramsType" TabType
701 :> QueryParamR "list" ListId
702 :> Get '[JSON] Version
703
704 type TableNgramsApi = TableNgramsApiGet
705 :<|> TableNgramsApiPut
706 :<|> RecomputeScoresNgramsApiGet
707 :<|> "version" :> TableNgramsApiGetVersion
708 :<|> TableNgramsAsyncApi
709
710 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
711 :> "async"
712 :> "charts"
713 :> "update"
714 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
715
716 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
717 => NodeId
718 -> TabType
719 -> ListId
720 -> Limit
721 -> Maybe Offset
722 -> Maybe ListType
723 -> Maybe MinSize -> Maybe MaxSize
724 -> Maybe OrderBy
725 -> Maybe Text -- full text search
726 -> m (VersionedWithCount NgramsTable)
727 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
728 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
729 where
730 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
731
732
733
734 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
735 => NodeId
736 -> TabType
737 -> ListId
738 -> m Version
739 getTableNgramsVersion _nId _tabType listId = currentVersion listId
740
741
742
743 -- TODO: limit?
744 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
745 -- This line above looks like a waste of computation to finally get only the version.
746 -- See the comment about listNgramsChangedSince.
747
748
749 -- | Text search is deactivated for now for ngrams by doc only
750 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
751 => DocId -> TabType
752 -> ListId -> Limit -> Maybe Offset
753 -> Maybe ListType
754 -> Maybe MinSize -> Maybe MaxSize
755 -> Maybe OrderBy
756 -> Maybe Text -- full text search
757 -> m (VersionedWithCount NgramsTable)
758 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
759 ns <- selectNodesWithUsername NodeList userMaster
760 let ngramsType = ngramsTypeFromTabType tabType
761 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
762 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
763 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
764
765
766
767 apiNgramsTableCorpus :: ( GargServerC env err m
768 )
769 => NodeId -> ServerT TableNgramsApi m
770 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
771 :<|> tableNgramsPut
772 :<|> scoresRecomputeTableNgrams cId
773 :<|> getTableNgramsVersion cId
774 :<|> apiNgramsAsync cId
775
776 apiNgramsTableDoc :: ( GargServerC env err m
777 )
778 => DocId -> ServerT TableNgramsApi m
779 apiNgramsTableDoc dId = getTableNgramsDoc dId
780 :<|> tableNgramsPut
781 :<|> scoresRecomputeTableNgrams dId
782 :<|> getTableNgramsVersion dId
783 :<|> apiNgramsAsync dId
784
785 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
786 apiNgramsAsync _dId =
787 serveJobsAPI $
788 JobFunction $ \i log ->
789 let
790 log' x = do
791 printDebug "tableNgramsPostChartsAsync" x
792 liftBase $ log x
793 in tableNgramsPostChartsAsync i log'
794
795 -- Did the given list of ngrams changed since the given version?
796 -- The returned value is versioned boolean value, meaning that one always retrieve the
797 -- latest version.
798 -- If the given version is negative then one simply receive the latest version and True.
799 -- Using this function is more precise than simply comparing the latest version number
800 -- with the local version number. Indeed there might be no change to this particular list
801 -- and still the version number has changed because of other lists.
802 --
803 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
804 -- * currentVersion: good computation, good bandwidth, bad precision.
805 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
806 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
807 listNgramsChangedSince :: HasNodeStory env err m
808 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
809 listNgramsChangedSince listId ngramsType version
810 | version < 0 =
811 Versioned <$> currentVersion listId <*> pure True
812 | otherwise =
813 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
814
815