]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[searx] some more searx parsing work
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsTablePatch
37 , NgramsTableMap
38
39 , NgramsTerm(..)
40
41 , NgramsElement(..)
42 , mkNgramsElement
43
44 , RootParent(..)
45
46 , MSet
47 , mSetFromList
48 , mSetToList
49
50 , Repo(..)
51 , r_version
52 , r_state
53 , r_history
54 , NgramsRepoElement(..)
55 , saveNodeStory
56 , initRepo
57
58 , TabType(..)
59
60 , QueryParamR
61 , TODO
62
63 -- Internals
64 , getNgramsTableMap
65 , dumpJsonTableMap
66 , tableNgramsPull
67 , tableNgramsPut
68
69 , Version
70 , Versioned(..)
71 , VersionedWithCount(..)
72 , currentVersion
73 , listNgramsChangedSince
74 )
75 where
76
77 import Control.Concurrent
78 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
79 import Control.Monad.Reader
80 import Data.Aeson hiding ((.=))
81 import Data.Either (Either(..))
82 import Data.Foldable
83 import Data.Map.Strict (Map)
84 import Data.Maybe (fromMaybe)
85 import Data.Monoid
86 import Data.Ord (Down(..))
87 import Data.Patch.Class (Action(act), Transformable(..), ours)
88 import Data.Swagger hiding (version, patch)
89 import Data.Text (Text, isInfixOf, unpack)
90 import Data.Text.Lazy.IO as DTL
91 import Formatting (hprint, int, (%))
92 import GHC.Generics (Generic)
93 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
94 import Gargantext.API.Admin.Types (HasSettings)
95 import Gargantext.API.Job
96 import Gargantext.API.Ngrams.Types
97 import Gargantext.API.Prelude
98 import Gargantext.Core.NodeStory
99 import Gargantext.Core.Mail.Types (HasMail)
100 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
101 import Gargantext.API.Ngrams.Tools
102 import Gargantext.Database.Action.Flow.Types
103 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
104 import Gargantext.Database.Admin.Config (userMaster)
105 import Gargantext.Database.Admin.Types.Node (NodeType(..))
106 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
107 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
108 import Gargantext.Database.Query.Table.Node (getNode)
109 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
110 import Gargantext.Database.Query.Table.Node.Select
111 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
112 import Gargantext.Prelude hiding (log)
113 import Gargantext.Prelude.Clock (hasTime, getTime)
114 import Prelude (error)
115 import Servant hiding (Patch)
116 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
117 import System.IO (stderr)
118 import Test.QuickCheck (elements)
119 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
120 import qualified Data.Aeson.Text as DAT
121 import qualified Data.List as List
122 import qualified Data.Map.Strict as Map
123 import qualified Data.Map.Strict.Patch as PM
124 import qualified Data.Set as S
125 import qualified Data.Set as Set
126 import qualified Gargantext.API.Metrics as Metrics
127 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
128
129 {-
130 -- TODO sequences of modifications (Patchs)
131 type NgramsIdPatch = Patch NgramsId NgramsPatch
132
133 ngramsPatch :: Int -> NgramsPatch
134 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
135
136 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
137 toEdit n p = Edit n p
138 ngramsIdPatch :: Patch NgramsId NgramsPatch
139 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
140 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
141 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
142 ]
143
144 -- applyPatchBack :: Patch -> IO Patch
145 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
146 -}
147 ------------------------------------------------------------------------
148 ------------------------------------------------------------------------
149 ------------------------------------------------------------------------
150
151 {-
152 -- TODO: Replace.old is ignored which means that if the current list
153 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
154 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
155 -- However this should not happen in non conflicting situations.
156 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
157 mkListsUpdate nt patches =
158 [ (ngramsTypeId nt, ng, listTypeId lt)
159 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
160 , lt <- patch ^.. patch_list . new
161 ]
162
163 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
164 -> NgramsType
165 -> NgramsTablePatch
166 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
167 mkChildrenGroups addOrRem nt patches =
168 [ (ngramsTypeId nt, parent, child)
169 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
170 , child <- patch ^.. patch_children . to addOrRem . folded
171 ]
172 -}
173
174 ------------------------------------------------------------------------
175
176 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
177 => m ()
178 saveNodeStory = liftBase =<< view hasNodeStorySaver
179
180
181 listTypeConflictResolution :: ListType -> ListType -> ListType
182 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
183
184
185 ngramsStatePatchConflictResolution
186 :: TableNgrams.NgramsType
187 -> NgramsTerm
188 -> ConflictResolutionNgramsPatch
189 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
190 = (ours, (const ours, ours), (False, False))
191 -- (False, False) mean here that Mod has always priority.
192 -- (True, False) <- would mean priority to the left (same as ours).
193 -- undefined {- TODO think this through -}, listTypeConflictResolution)
194
195
196
197
198 -- Current state:
199 -- Insertions are not considered as patches,
200 -- they do not extend history,
201 -- they do not bump version.
202 insertNewOnly :: a -> Maybe b -> a
203 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
204 -- TODO error handling
205
206 {- unused
207 -- TODO refactor with putListNgrams
208 copyListNgrams :: RepoCmdM env err m
209 => NodeId -> NodeId -> NgramsType
210 -> m ()
211 copyListNgrams srcListId dstListId ngramsType = do
212 var <- view repoVar
213 liftBase $ modifyMVar_ var $
214 pure . (r_state . at ngramsType %~ (Just . f . something))
215 saveNodeStory
216 where
217 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
218 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
219
220 -- TODO refactor with putListNgrams
221 -- The list must be non-empty!
222 -- The added ngrams must be non-existent!
223 addListNgrams :: RepoCmdM env err m
224 => NodeId -> NgramsType
225 -> [NgramsElement] -> m ()
226 addListNgrams listId ngramsType nes = do
227 var <- view repoVar
228 liftBase $ modifyMVar_ var $
229 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
230 saveNodeStory
231 where
232 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
233 -}
234
235 -- | TODO: incr the Version number
236 -- && should use patch
237 -- UNSAFE
238
239 setListNgrams :: HasNodeStory env err m
240 => NodeId
241 -> TableNgrams.NgramsType
242 -> Map NgramsTerm NgramsRepoElement
243 -> m ()
244 setListNgrams listId ngramsType ns = do
245 printDebug "[setListNgrams]" (listId, ngramsType)
246 getter <- view hasNodeStory
247 var <- liftBase $ (getter ^. nse_getter) [listId]
248 liftBase $ modifyMVar_ var $
249 pure . ( unNodeStory
250 . at listId . _Just
251 . a_state
252 . at ngramsType
253 .~ Just ns
254 )
255 saveNodeStory
256
257
258 currentVersion :: HasNodeStory env err m
259 => ListId -> m Version
260 currentVersion listId = do
261 nls <- getRepo' [listId]
262 pure $ nls ^. unNodeStory . at listId . _Just . a_version
263
264
265 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
266 newNgramsFromNgramsStatePatch p =
267 [ text2ngrams (unNgramsTerm n)
268 | (n,np) <- p ^.. _PatchMap
269 -- . each . _PatchMap
270 . each . _NgramsTablePatch
271 . _PatchMap . ifolded . withIndex
272 , _ <- np ^.. patch_new . _Just
273 ]
274
275
276
277
278 commitStatePatch :: (HasNodeStory env err m, HasMail env)
279 => ListId
280 -> Versioned NgramsStatePatch'
281 -> m (Versioned NgramsStatePatch')
282 commitStatePatch listId (Versioned p_version p) = do
283 printDebug "[commitStatePatch]" listId
284 var <- getNodeStoryVar [listId]
285 vq' <- liftBase $ modifyMVar var $ \ns -> do
286 let
287 a = ns ^. unNodeStory . at listId . _Just
288 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
289 (p', q') = transformWith ngramsStatePatchConflictResolution p q
290 a' = a & a_version +~ 1
291 & a_state %~ act p'
292 & a_history %~ (p' :)
293
294 {-
295 -- Ideally we would like to check these properties. However:
296 -- * They should be checked only to debug the code. The client data
297 -- should be able to trigger these.
298 -- * What kind of error should they throw (we are in IO here)?
299 -- * Should we keep modifyMVar?
300 -- * Should we throw the validation in an Exception, catch it around
301 -- modifyMVar and throw it back as an Error?
302 assertValid $ transformable p q
303 assertValid $ applicable p' (r ^. r_state)
304 -}
305 printDebug "[commitStatePatch] a version" (a ^. a_version)
306 printDebug "[commitStatePatch] a' version" (a' ^. a_version)
307 pure ( ns & unNodeStory . at listId .~ (Just a')
308 , Versioned (a' ^. a_version) q'
309 )
310 saveNodeStory
311 -- Save new ngrams
312 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
313
314 pure vq'
315
316
317
318 -- This is a special case of tableNgramsPut where the input patch is empty.
319 tableNgramsPull :: HasNodeStory env err m
320 => ListId
321 -> TableNgrams.NgramsType
322 -> Version
323 -> m (Versioned NgramsTablePatch)
324 tableNgramsPull listId ngramsType p_version = do
325 printDebug "[tableNgramsPull]" (listId, ngramsType)
326 var <- getNodeStoryVar [listId]
327 r <- liftBase $ readMVar var
328
329 let
330 a = r ^. unNodeStory . at listId . _Just
331 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
332 q_table = q ^. _PatchMap . at ngramsType . _Just
333
334 pure (Versioned (a ^. a_version) q_table)
335
336
337
338
339 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
340 -- Apply the given patch to the DB and returns the patch to be applied on the
341 -- client.
342 -- TODO-ACCESS check
343 tableNgramsPut :: ( HasNodeStory env err m
344 , HasInvalidError err
345 , HasSettings env
346 , HasMail env
347 )
348 => TabType
349 -> ListId
350 -> Versioned NgramsTablePatch
351 -> m (Versioned NgramsTablePatch)
352 tableNgramsPut tabType listId (Versioned p_version p_table)
353 | p_table == mempty = do
354 printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
355 let ngramsType = ngramsTypeFromTabType tabType
356 tableNgramsPull listId ngramsType p_version
357
358 | otherwise = do
359 printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
360 let ngramsType = ngramsTypeFromTabType tabType
361 (p, p_validity) = PM.singleton ngramsType p_table
362
363 assertValid p_validity
364
365 ret <- commitStatePatch listId (Versioned p_version p)
366 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
367
368 pure ret
369
370
371
372 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
373 , FlowCmdM env err m
374 , HasNodeError err
375 , HasSettings env
376 )
377 => UpdateTableNgramsCharts
378 -> (JobLog -> m ())
379 -> m JobLog
380 tableNgramsPostChartsAsync utn logStatus = do
381 let tabType = utn ^. utn_tab_type
382 let listId = utn ^. utn_list_id
383
384 node <- getNode listId
385 let nId = node ^. node_id
386 _uId = node ^. node_user_id
387 mCId = node ^. node_parent_id
388
389 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
390 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
391
392 case mCId of
393 Nothing -> do
394 printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
395 pure $ jobLogFail $ jobLogInit 1
396 Just cId -> do
397 case tabType of
398 Authors -> do
399 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
400 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
401 logRef
402 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
403 logRefSuccess
404
405 getRef
406 Institutes -> do
407 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
408 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
409 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
410 logRef
411 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
412 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
413 logRefSuccess
414 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
415 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
416 logRefSuccess
417 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
418 logRefSuccess
419
420 getRef
421 Sources -> do
422 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
423 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
424 logRef
425 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
426 logRefSuccess
427
428 getRef
429 Terms -> do
430 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
431 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
432 logRef
433 {-
434 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
435 logRefSuccess
436 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
437 logRefSuccess
438 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
439 logRefSuccess
440 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
441 logRefSuccess
442 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
443 logRefSuccess
444 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
445 -}
446 logRefSuccess
447
448 getRef
449 _ -> do
450 printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
451 pure $ jobLogFail $ jobLogInit 1
452
453 {-
454 { _ne_list :: ListType
455 If we merge the parents/children we can potentially create cycles!
456 , _ne_parent :: Maybe NgramsTerm
457 , _ne_children :: MSet NgramsTerm
458 }
459 -}
460
461 getNgramsTableMap :: HasNodeStory env err m
462 => NodeId
463 -> TableNgrams.NgramsType
464 -> m (Versioned NgramsTableMap)
465 getNgramsTableMap nodeId ngramsType = do
466 v <- getNodeStoryVar [nodeId]
467 repo <- liftBase $ readMVar v
468 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
469 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
470
471
472 dumpJsonTableMap :: HasNodeStory env err m
473 => Text
474 -> NodeId
475 -> TableNgrams.NgramsType
476 -> m ()
477 dumpJsonTableMap fpath nodeId ngramsType = do
478 m <- getNgramsTableMap nodeId ngramsType
479 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
480 pure ()
481
482
483 type MinSize = Int
484 type MaxSize = Int
485
486 -- | TODO Errors management
487 -- TODO: polymorphic for Annuaire or Corpus or ...
488 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
489 -- TODO: should take only one ListId
490
491
492 getTableNgrams :: forall env err m.
493 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
494 => NodeType -> NodeId -> TabType
495 -> ListId -> Limit -> Maybe Offset
496 -> Maybe ListType
497 -> Maybe MinSize -> Maybe MaxSize
498 -> Maybe OrderBy
499 -> (NgramsTerm -> Bool)
500 -> m (VersionedWithCount NgramsTable)
501 getTableNgrams _nType nId tabType listId limit_ offset
502 listType minSize maxSize orderBy searchQuery = do
503
504 t0 <- getTime
505 -- lIds <- selectNodesWithUsername NodeList userMaster
506 let
507 ngramsType = ngramsTypeFromTabType tabType
508 offset' = maybe 0 identity offset
509 listType' = maybe (const True) (==) listType
510 minSize' = maybe (const True) (<=) minSize
511 maxSize' = maybe (const True) (>=) maxSize
512
513 selected_node n = minSize' s
514 && maxSize' s
515 && searchQuery (n ^. ne_ngrams)
516 && listType' (n ^. ne_list)
517 where
518 s = n ^. ne_size
519
520 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
521
522 ---------------------------------------
523 sortOnOrder Nothing = identity
524 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
525 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
526 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
527 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
528
529 ---------------------------------------
530
531 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
532 filteredNodes tableMap = rootOf <$> list & filter selected_node
533 where
534 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
535 (tableMap ^. at r)
536 )
537 (ne ^. ne_root)
538 list = tableMap ^.. each
539
540 ---------------------------------------
541 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
542 selectAndPaginate tableMap = roots <> inners
543 where
544 list = tableMap ^.. each
545 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
546 (tableMap ^. at r)
547 )
548 (ne ^. ne_root)
549 selected_nodes = list & take limit_
550 . drop offset'
551 . filter selected_node
552 . sortOnOrder orderBy
553 roots = rootOf <$> selected_nodes
554 rootsSet = Set.fromList (_ne_ngrams <$> roots)
555 inners = list & filter (selected_inner rootsSet)
556
557 ---------------------------------------
558 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
559 setScores False table = pure table
560 setScores True table = do
561 let ngrams_terms = table ^.. each . ne_ngrams
562 t1 <- getTime
563 occurrences <- getOccByNgramsOnlyFast' nId
564 listId
565 ngramsType
566 ngrams_terms
567 t2 <- getTime
568 liftBase $ hprint stderr
569 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
570 (length ngrams_terms) t1 t2
571 {-
572 occurrences <- getOccByNgramsOnlySlow nType nId
573 (lIds <> [listId])
574 ngramsType
575 ngrams_terms
576 -}
577 let
578 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
579
580 pure $ table & each %~ setOcc
581 ---------------------------------------
582
583 -- lists <- catMaybes <$> listsWith userMaster
584 -- trace (show lists) $
585 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
586
587
588 let scoresNeeded = needsScores orderBy
589 tableMap1 <- getNgramsTableMap listId ngramsType
590 t1 <- getTime
591 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
592 . Map.mapWithKey ngramsElementFromRepo
593
594 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
595 . filteredNodes
596 let fltrCount = length $ fltr ^. v_data . _NgramsTable
597
598 t2 <- getTime
599 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
600 . setScores (not scoresNeeded)
601 . selectAndPaginate
602 t3 <- getTime
603 liftBase $ hprint stderr
604 ("getTableNgrams total=" % hasTime
605 % " map1=" % hasTime
606 % " map2=" % hasTime
607 % " map3=" % hasTime
608 % " sql=" % (if scoresNeeded then "map2" else "map3")
609 % "\n"
610 ) t0 t3 t0 t1 t1 t2 t2 t3
611 pure $ toVersionedWithCount fltrCount tableMap3
612
613
614
615 scoresRecomputeTableNgrams :: forall env err m.
616 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
617 => NodeId -> TabType -> ListId -> m Int
618 scoresRecomputeTableNgrams nId tabType listId = do
619 tableMap <- getNgramsTableMap listId ngramsType
620 _ <- tableMap & v_data %%~ setScores
621 . Map.mapWithKey ngramsElementFromRepo
622
623 pure $ 1
624 where
625 ngramsType = ngramsTypeFromTabType tabType
626
627 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
628 setScores table = do
629 let ngrams_terms = table ^.. each . ne_ngrams
630 occurrences <- getOccByNgramsOnlyFast' nId
631 listId
632 ngramsType
633 ngrams_terms
634 let
635 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
636
637 pure $ table & each %~ setOcc
638
639
640
641
642 -- APIs
643
644 -- TODO: find a better place for the code above, All APIs stay here
645
646 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
647 deriving (Generic, Enum, Bounded, Read, Show)
648
649 instance FromHttpApiData OrderBy
650 where
651 parseUrlPiece "TermAsc" = pure TermAsc
652 parseUrlPiece "TermDesc" = pure TermDesc
653 parseUrlPiece "ScoreAsc" = pure ScoreAsc
654 parseUrlPiece "ScoreDesc" = pure ScoreDesc
655 parseUrlPiece _ = Left "Unexpected value of OrderBy"
656
657
658 instance ToParamSchema OrderBy
659 instance FromJSON OrderBy
660 instance ToJSON OrderBy
661 instance ToSchema OrderBy
662 instance Arbitrary OrderBy
663 where
664 arbitrary = elements [minBound..maxBound]
665
666 needsScores :: Maybe OrderBy -> Bool
667 needsScores (Just ScoreAsc) = True
668 needsScores (Just ScoreDesc) = True
669 needsScores _ = False
670
671 type TableNgramsApiGet = Summary " Table Ngrams API Get"
672 :> QueryParamR "ngramsType" TabType
673 :> QueryParamR "list" ListId
674 :> QueryParamR "limit" Limit
675 :> QueryParam "offset" Offset
676 :> QueryParam "listType" ListType
677 :> QueryParam "minTermSize" MinSize
678 :> QueryParam "maxTermSize" MaxSize
679 :> QueryParam "orderBy" OrderBy
680 :> QueryParam "search" Text
681 :> Get '[JSON] (VersionedWithCount NgramsTable)
682
683 type TableNgramsApiPut = Summary " Table Ngrams API Change"
684 :> QueryParamR "ngramsType" TabType
685 :> QueryParamR "list" ListId
686 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
687 :> Put '[JSON] (Versioned NgramsTablePatch)
688
689 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
690 :> QueryParamR "ngramsType" TabType
691 :> QueryParamR "list" ListId
692 :> "recompute" :> Post '[JSON] Int
693
694 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
695 :> QueryParamR "ngramsType" TabType
696 :> QueryParamR "list" ListId
697 :> Get '[JSON] Version
698
699 type TableNgramsApi = TableNgramsApiGet
700 :<|> TableNgramsApiPut
701 :<|> RecomputeScoresNgramsApiGet
702 :<|> "version" :> TableNgramsApiGetVersion
703 :<|> TableNgramsAsyncApi
704
705 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
706 :> "async"
707 :> "charts"
708 :> "update"
709 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
710
711 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
712 => NodeId
713 -> TabType
714 -> ListId
715 -> Limit
716 -> Maybe Offset
717 -> Maybe ListType
718 -> Maybe MinSize -> Maybe MaxSize
719 -> Maybe OrderBy
720 -> Maybe Text -- full text search
721 -> m (VersionedWithCount NgramsTable)
722 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
723 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
724 where
725 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
726
727
728
729 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
730 => NodeId
731 -> TabType
732 -> ListId
733 -> m Version
734 getTableNgramsVersion _nId _tabType listId = currentVersion listId
735
736
737
738 -- TODO: limit?
739 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
740 -- This line above looks like a waste of computation to finally get only the version.
741 -- See the comment about listNgramsChangedSince.
742
743
744 -- | Text search is deactivated for now for ngrams by doc only
745 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
746 => DocId -> TabType
747 -> ListId -> Limit -> Maybe Offset
748 -> Maybe ListType
749 -> Maybe MinSize -> Maybe MaxSize
750 -> Maybe OrderBy
751 -> Maybe Text -- full text search
752 -> m (VersionedWithCount NgramsTable)
753 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
754 ns <- selectNodesWithUsername NodeList userMaster
755 let ngramsType = ngramsTypeFromTabType tabType
756 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
757 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
758 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
759
760
761
762 apiNgramsTableCorpus :: ( GargServerC env err m
763 )
764 => NodeId -> ServerT TableNgramsApi m
765 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
766 :<|> tableNgramsPut
767 :<|> scoresRecomputeTableNgrams cId
768 :<|> getTableNgramsVersion cId
769 :<|> apiNgramsAsync cId
770
771 apiNgramsTableDoc :: ( GargServerC env err m
772 )
773 => DocId -> ServerT TableNgramsApi m
774 apiNgramsTableDoc dId = getTableNgramsDoc dId
775 :<|> tableNgramsPut
776 :<|> scoresRecomputeTableNgrams dId
777 :<|> getTableNgramsVersion dId
778 :<|> apiNgramsAsync dId
779
780 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
781 apiNgramsAsync _dId =
782 serveJobsAPI $
783 JobFunction $ \i log ->
784 let
785 log' x = do
786 printDebug "tableNgramsPostChartsAsync" x
787 liftBase $ log x
788 in tableNgramsPostChartsAsync i log'
789
790 -- Did the given list of ngrams changed since the given version?
791 -- The returned value is versioned boolean value, meaning that one always retrieve the
792 -- latest version.
793 -- If the given version is negative then one simply receive the latest version and True.
794 -- Using this function is more precise than simply comparing the latest version number
795 -- with the local version number. Indeed there might be no change to this particular list
796 -- and still the version number has changed because of other lists.
797 --
798 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
799 -- * currentVersion: good computation, good bandwidth, bad precision.
800 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
801 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
802 listNgramsChangedSince :: HasNodeStory env err m
803 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
804 listNgramsChangedSince listId ngramsType version
805 | version < 0 =
806 Versioned <$> currentVersion listId <*> pure True
807 | otherwise =
808 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
809
810