]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[searx] first draft of searx parsing, updated stack to lts 18.4
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsStatePatch
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepo
56 , NgramsRepoElement(..)
57 , saveRepo
58 , initRepo
59
60 , RepoEnv(..)
61 , renv_var
62 , renv_lock
63
64 , TabType(..)
65
66 , HasRepoVar(..)
67 , HasRepoSaver(..)
68 , HasRepo(..)
69 , RepoCmdM
70 , QueryParamR
71 , TODO
72
73 -- Internals
74 , getNgramsTableMap
75 , dumpJsonTableMap
76 , tableNgramsPull
77 , tableNgramsPut
78
79 , Version
80 , Versioned(..)
81 , VersionedWithCount(..)
82 , currentVersion
83 , listNgramsChangedSince
84 )
85 where
86
87 import Control.Concurrent
88 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
89 import Control.Monad.Reader
90 import Data.Aeson hiding ((.=))
91 import Data.Either (Either(..))
92 import Data.Foldable
93 import Data.Map.Strict (Map)
94 import Data.Maybe (fromMaybe)
95 import Data.Monoid
96 import Data.Ord (Down(..))
97 import Data.Patch.Class (Action(act), Transformable(..), ours)
98 import Data.Swagger hiding (version, patch)
99 import Data.Text (Text, isInfixOf, unpack)
100 import Data.Text.Lazy.IO as DTL
101 import Formatting (hprint, int, (%))
102 import GHC.Generics (Generic)
103 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
104 import Gargantext.API.Admin.Types (HasSettings)
105 import Gargantext.API.Ngrams.Types
106 import Gargantext.API.Prelude
107 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
108 import Gargantext.Core.Utils (something)
109 import Gargantext.Database.Action.Flow.Types
110 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
111 import Gargantext.Database.Admin.Config (userMaster)
112 import Gargantext.Database.Admin.Types.Node (NodeType(..))
113 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
114 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
115 import Gargantext.Database.Query.Table.Node (getNode)
116 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
117 import Gargantext.Database.Query.Table.Node.Select
118 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
119 import Gargantext.Prelude hiding (log)
120 import Gargantext.API.Job
121 import Gargantext.Prelude.Clock (hasTime, getTime)
122 import Prelude (error)
123 import Servant hiding (Patch)
124 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
125 import System.IO (stderr)
126 import Test.QuickCheck (elements)
127 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
128 import qualified Data.Aeson.Text as DAT
129 import qualified Data.List as List
130 import qualified Data.Map.Strict as Map
131 import qualified Data.Map.Strict.Patch as PM
132 import qualified Data.Set as S
133 import qualified Data.Set as Set
134 import qualified Gargantext.API.Metrics as Metrics
135 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
136
137 {-
138 -- TODO sequences of modifications (Patchs)
139 type NgramsIdPatch = Patch NgramsId NgramsPatch
140
141 ngramsPatch :: Int -> NgramsPatch
142 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
143
144 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
145 toEdit n p = Edit n p
146 ngramsIdPatch :: Patch NgramsId NgramsPatch
147 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
148 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
149 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
150 ]
151
152 -- applyPatchBack :: Patch -> IO Patch
153 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
154 -}
155 ------------------------------------------------------------------------
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158
159 {-
160 -- TODO: Replace.old is ignored which means that if the current list
161 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
162 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
163 -- However this should not happen in non conflicting situations.
164 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
165 mkListsUpdate nt patches =
166 [ (ngramsTypeId nt, ng, listTypeId lt)
167 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
168 , lt <- patch ^.. patch_list . new
169 ]
170
171 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
172 -> NgramsType
173 -> NgramsTablePatch
174 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
175 mkChildrenGroups addOrRem nt patches =
176 [ (ngramsTypeId nt, parent, child)
177 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
178 , child <- patch ^.. patch_children . to addOrRem . folded
179 ]
180 -}
181
182 ------------------------------------------------------------------------
183
184 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
185 => m ()
186 saveRepo = liftBase =<< view repoSaver
187
188 listTypeConflictResolution :: ListType -> ListType -> ListType
189 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
190
191 ngramsStatePatchConflictResolution
192 :: TableNgrams.NgramsType
193 -> NodeId
194 -> NgramsTerm
195 -> ConflictResolutionNgramsPatch
196 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
197 = (ours, (const ours, ours), (False, False))
198 -- (False, False) mean here that Mod has always priority.
199 -- (True, False) <- would mean priority to the left (same as ours).
200
201 -- undefined {- TODO think this through -}, listTypeConflictResolution)
202
203 -- Current state:
204 -- Insertions are not considered as patches,
205 -- they do not extend history,
206 -- they do not bump version.
207 insertNewOnly :: a -> Maybe b -> a
208 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
209 -- TODO error handling
210
211 {- unused
212 -- TODO refactor with putListNgrams
213 copyListNgrams :: RepoCmdM env err m
214 => NodeId -> NodeId -> NgramsType
215 -> m ()
216 copyListNgrams srcListId dstListId ngramsType = do
217 var <- view repoVar
218 liftBase $ modifyMVar_ var $
219 pure . (r_state . at ngramsType %~ (Just . f . something))
220 saveRepo
221 where
222 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
223 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
224
225 -- TODO refactor with putListNgrams
226 -- The list must be non-empty!
227 -- The added ngrams must be non-existent!
228 addListNgrams :: RepoCmdM env err m
229 => NodeId -> NgramsType
230 -> [NgramsElement] -> m ()
231 addListNgrams listId ngramsType nes = do
232 var <- view repoVar
233 liftBase $ modifyMVar_ var $
234 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
235 saveRepo
236 where
237 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
238 -}
239
240 -- UNSAFE
241 rmListNgrams :: RepoCmdM env err m
242 => ListId
243 -> TableNgrams.NgramsType
244 -> m ()
245 rmListNgrams l nt = setListNgrams l nt mempty
246
247 -- | TODO: incr the Version number
248 -- && should use patch
249 -- UNSAFE
250 setListNgrams :: RepoCmdM env err m
251 => NodeId
252 -> TableNgrams.NgramsType
253 -> Map NgramsTerm NgramsRepoElement
254 -> m ()
255 setListNgrams listId ngramsType ns = do
256 var <- view repoVar
257 liftBase $ modifyMVar_ var $
258 pure . ( r_state
259 . at ngramsType %~
260 (Just .
261 (at listId .~ ( Just ns))
262 . something
263 )
264 )
265 printDebug "List modified" NodeList
266 saveRepo
267
268
269 currentVersion :: RepoCmdM env err m
270 => m Version
271 currentVersion = do
272 var <- view repoVar
273 r <- liftBase $ readMVar var
274 pure $ r ^. r_version
275
276 newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
277 newNgramsFromNgramsStatePatch p =
278 [ text2ngrams (unNgramsTerm n)
279 | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
280 , _ <- np ^.. patch_new . _Just
281 ]
282
283 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
284 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
285 commitStatePatch (Versioned p_version p) = do
286 var <- view repoVar
287 vq' <- liftBase $ modifyMVar var $ \r -> do
288 let
289 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
290 (p', q') = transformWith ngramsStatePatchConflictResolution p q
291 r' = r & r_version +~ 1
292 & r_state %~ act p'
293 & r_history %~ (p' :)
294 {-
295 -- Ideally we would like to check these properties. However:
296 -- * They should be checked only to debug the code. The client data
297 -- should be able to trigger these.
298 -- * What kind of error should they throw (we are in IO here)?
299 -- * Should we keep modifyMVar?
300 -- * Should we throw the validation in an Exception, catch it around
301 -- modifyMVar and throw it back as an Error?
302 assertValid $ transformable p q
303 assertValid $ applicable p' (r ^. r_state)
304 -}
305 pure (r', Versioned (r' ^. r_version) q')
306
307 saveRepo
308
309 -- Save new ngrams
310 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
311
312 pure vq'
313
314 -- This is a special case of tableNgramsPut where the input patch is empty.
315 tableNgramsPull :: RepoCmdM env err m
316 => ListId
317 -> TableNgrams.NgramsType
318 -> Version
319 -> m (Versioned NgramsTablePatch)
320 tableNgramsPull listId ngramsType p_version = do
321 var <- view repoVar
322 r <- liftBase $ readMVar var
323
324 let
325 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
326 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
327
328 pure (Versioned (r ^. r_version) q_table)
329
330 -- Apply the given patch to the DB and returns the patch to be applied on the
331 -- client.
332 -- TODO-ACCESS check
333 tableNgramsPut :: ( FlowCmdM env err m
334 , HasSettings env
335 )
336 => TabType
337 -> ListId
338 -> Versioned NgramsTablePatch
339 -> m (Versioned NgramsTablePatch)
340 tableNgramsPut tabType listId (Versioned p_version p_table)
341 | p_table == mempty = do
342 let ngramsType = ngramsTypeFromTabType tabType
343 tableNgramsPull listId ngramsType p_version
344
345 | otherwise = do
346 let ngramsType = ngramsTypeFromTabType tabType
347 (p0, p0_validity) = PM.singleton listId p_table
348 (p, p_validity) = PM.singleton ngramsType p0
349
350 assertValid p0_validity
351 assertValid p_validity
352
353 ret <- commitStatePatch (Versioned p_version p)
354 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
355
356 pure ret
357
358
359 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
360 , HasNodeError err
361 , HasSettings env
362 )
363 => UpdateTableNgramsCharts
364 -> (JobLog -> m ())
365 -> m JobLog
366 tableNgramsPostChartsAsync utn logStatus = do
367 let tabType = utn ^. utn_tab_type
368 let listId = utn ^. utn_list_id
369
370 node <- getNode listId
371 let nId = node ^. node_id
372 _uId = node ^. node_user_id
373 mCId = node ^. node_parent_id
374
375 printDebug "[tableNgramsPut] tabType" tabType
376 printDebug "[tableNgramsPut] listId" listId
377
378 case mCId of
379 Nothing -> do
380 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
381 pure $ jobLogFail $ jobLogInit 1
382 Just cId -> do
383 case tabType of
384 Authors -> do
385 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
386 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
387 logRef
388 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
389 logRefSuccess
390
391 getRef
392 Institutes -> do
393 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
394 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
395 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
396 logRef
397 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
398 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
399 logRefSuccess
400 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
401 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
402 logRefSuccess
403 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
404 logRefSuccess
405
406 getRef
407 Sources -> do
408 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
409 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
410 logRef
411 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
412 logRefSuccess
413
414 getRef
415 Terms -> do
416 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
417 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
418 logRef
419 {-
420 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
421 logRefSuccess
422 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
423 logRefSuccess
424 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
425 logRefSuccess
426 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
427 logRefSuccess
428 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
429 logRefSuccess
430 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
431 -}
432 logRefSuccess
433
434 getRef
435 _ -> do
436 printDebug "[tableNgramsPut] no update for tabType = " tabType
437 pure $ jobLogFail $ jobLogInit 1
438
439 {-
440 { _ne_list :: ListType
441 If we merge the parents/children we can potentially create cycles!
442 , _ne_parent :: Maybe NgramsTerm
443 , _ne_children :: MSet NgramsTerm
444 }
445 -}
446
447 getNgramsTableMap :: RepoCmdM env err m
448 => NodeId
449 -> TableNgrams.NgramsType
450 -> m (Versioned NgramsTableMap)
451 getNgramsTableMap nodeId ngramsType = do
452 v <- view repoVar
453 repo <- liftBase $ readMVar v
454 pure $ Versioned (repo ^. r_version)
455 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
456
457 dumpJsonTableMap :: RepoCmdM env err m
458 => Text
459 -> NodeId
460 -> TableNgrams.NgramsType
461 -> m ()
462 dumpJsonTableMap fpath nodeId ngramsType = do
463 m <- getNgramsTableMap nodeId ngramsType
464 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
465 pure ()
466
467 type MinSize = Int
468 type MaxSize = Int
469
470 -- | TODO Errors management
471 -- TODO: polymorphic for Annuaire or Corpus or ...
472 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
473 -- TODO: should take only one ListId
474
475
476 getTableNgrams :: forall env err m.
477 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
478 => NodeType -> NodeId -> TabType
479 -> ListId -> Limit -> Maybe Offset
480 -> Maybe ListType
481 -> Maybe MinSize -> Maybe MaxSize
482 -> Maybe OrderBy
483 -> (NgramsTerm -> Bool)
484 -> m (VersionedWithCount NgramsTable)
485 getTableNgrams _nType nId tabType listId limit_ offset
486 listType minSize maxSize orderBy searchQuery = do
487
488 t0 <- getTime
489 -- lIds <- selectNodesWithUsername NodeList userMaster
490 let
491 ngramsType = ngramsTypeFromTabType tabType
492 offset' = maybe 0 identity offset
493 listType' = maybe (const True) (==) listType
494 minSize' = maybe (const True) (<=) minSize
495 maxSize' = maybe (const True) (>=) maxSize
496
497 selected_node n = minSize' s
498 && maxSize' s
499 && searchQuery (n ^. ne_ngrams)
500 && listType' (n ^. ne_list)
501 where
502 s = n ^. ne_size
503
504 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
505
506 ---------------------------------------
507 sortOnOrder Nothing = identity
508 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
509 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
510 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
511 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
512
513 ---------------------------------------
514
515 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
516 filteredNodes tableMap = rootOf <$> list & filter selected_node
517 where
518 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
519 (tableMap ^. at r)
520 )
521 (ne ^. ne_root)
522 list = tableMap ^.. each
523
524 ---------------------------------------
525 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
526 selectAndPaginate tableMap = roots <> inners
527 where
528 list = tableMap ^.. each
529 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
530 (tableMap ^. at r)
531 )
532 (ne ^. ne_root)
533 selected_nodes = list & take limit_
534 . drop offset'
535 . filter selected_node
536 . sortOnOrder orderBy
537 roots = rootOf <$> selected_nodes
538 rootsSet = Set.fromList (_ne_ngrams <$> roots)
539 inners = list & filter (selected_inner rootsSet)
540
541 ---------------------------------------
542 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
543 setScores False table = pure table
544 setScores True table = do
545 let ngrams_terms = table ^.. each . ne_ngrams
546 t1 <- getTime
547 occurrences <- getOccByNgramsOnlyFast' nId
548 listId
549 ngramsType
550 ngrams_terms
551 t2 <- getTime
552 liftBase $ hprint stderr
553 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
554 (length ngrams_terms) t1 t2
555 {-
556 occurrences <- getOccByNgramsOnlySlow nType nId
557 (lIds <> [listId])
558 ngramsType
559 ngrams_terms
560 -}
561 let
562 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
563
564 pure $ table & each %~ setOcc
565 ---------------------------------------
566
567 -- lists <- catMaybes <$> listsWith userMaster
568 -- trace (show lists) $
569 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
570
571
572 let scoresNeeded = needsScores orderBy
573 tableMap1 <- getNgramsTableMap listId ngramsType
574 t1 <- getTime
575 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
576 . Map.mapWithKey ngramsElementFromRepo
577
578 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
579 . filteredNodes
580 let fltrCount = length $ fltr ^. v_data . _NgramsTable
581
582 t2 <- getTime
583 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
584 . setScores (not scoresNeeded)
585 . selectAndPaginate
586 t3 <- getTime
587 liftBase $ hprint stderr
588 ("getTableNgrams total=" % hasTime
589 % " map1=" % hasTime
590 % " map2=" % hasTime
591 % " map3=" % hasTime
592 % " sql=" % (if scoresNeeded then "map2" else "map3")
593 % "\n"
594 ) t0 t3 t0 t1 t1 t2 t2 t3
595 pure $ toVersionedWithCount fltrCount tableMap3
596
597
598 scoresRecomputeTableNgrams :: forall env err m.
599 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
600 => NodeId -> TabType -> ListId -> m Int
601 scoresRecomputeTableNgrams nId tabType listId = do
602 tableMap <- getNgramsTableMap listId ngramsType
603 _ <- tableMap & v_data %%~ setScores
604 . Map.mapWithKey ngramsElementFromRepo
605
606 pure $ 1
607 where
608 ngramsType = ngramsTypeFromTabType tabType
609
610 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
611 setScores table = do
612 let ngrams_terms = table ^.. each . ne_ngrams
613 occurrences <- getOccByNgramsOnlyFast' nId
614 listId
615 ngramsType
616 ngrams_terms
617 let
618 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
619
620 pure $ table & each %~ setOcc
621
622
623
624 -- APIs
625
626 -- TODO: find a better place for the code above, All APIs stay here
627
628 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
629 deriving (Generic, Enum, Bounded, Read, Show)
630
631 instance FromHttpApiData OrderBy
632 where
633 parseUrlPiece "TermAsc" = pure TermAsc
634 parseUrlPiece "TermDesc" = pure TermDesc
635 parseUrlPiece "ScoreAsc" = pure ScoreAsc
636 parseUrlPiece "ScoreDesc" = pure ScoreDesc
637 parseUrlPiece _ = Left "Unexpected value of OrderBy"
638
639
640 instance ToParamSchema OrderBy
641 instance FromJSON OrderBy
642 instance ToJSON OrderBy
643 instance ToSchema OrderBy
644 instance Arbitrary OrderBy
645 where
646 arbitrary = elements [minBound..maxBound]
647
648 needsScores :: Maybe OrderBy -> Bool
649 needsScores (Just ScoreAsc) = True
650 needsScores (Just ScoreDesc) = True
651 needsScores _ = False
652
653 type TableNgramsApiGet = Summary " Table Ngrams API Get"
654 :> QueryParamR "ngramsType" TabType
655 :> QueryParamR "list" ListId
656 :> QueryParamR "limit" Limit
657 :> QueryParam "offset" Offset
658 :> QueryParam "listType" ListType
659 :> QueryParam "minTermSize" MinSize
660 :> QueryParam "maxTermSize" MaxSize
661 :> QueryParam "orderBy" OrderBy
662 :> QueryParam "search" Text
663 :> Get '[JSON] (VersionedWithCount NgramsTable)
664
665 type TableNgramsApiPut = Summary " Table Ngrams API Change"
666 :> QueryParamR "ngramsType" TabType
667 :> QueryParamR "list" ListId
668 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
669 :> Put '[JSON] (Versioned NgramsTablePatch)
670
671 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
672 :> QueryParamR "ngramsType" TabType
673 :> QueryParamR "list" ListId
674 :> "recompute" :> Post '[JSON] Int
675
676 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
677 :> QueryParamR "ngramsType" TabType
678 :> QueryParamR "list" ListId
679 :> Get '[JSON] Version
680
681 type TableNgramsApi = TableNgramsApiGet
682 :<|> TableNgramsApiPut
683 :<|> RecomputeScoresNgramsApiGet
684 :<|> "version" :> TableNgramsApiGetVersion
685 :<|> TableNgramsAsyncApi
686
687 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
688 :> "async"
689 :> "charts"
690 :> "update"
691 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
692
693 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
694 => NodeId
695 -> TabType
696 -> ListId
697 -> Limit
698 -> Maybe Offset
699 -> Maybe ListType
700 -> Maybe MinSize -> Maybe MaxSize
701 -> Maybe OrderBy
702 -> Maybe Text -- full text search
703 -> m (VersionedWithCount NgramsTable)
704 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
705 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
706 where
707 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
708
709 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
710 => NodeId
711 -> TabType
712 -> ListId
713 -> m Version
714 getTableNgramsVersion _nId _tabType _listId = currentVersion
715 -- TODO: limit?
716 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
717 -- This line above looks like a waste of computation to finally get only the version.
718 -- See the comment about listNgramsChangedSince.
719
720
721 -- | Text search is deactivated for now for ngrams by doc only
722 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
723 => DocId -> TabType
724 -> ListId -> Limit -> Maybe Offset
725 -> Maybe ListType
726 -> Maybe MinSize -> Maybe MaxSize
727 -> Maybe OrderBy
728 -> Maybe Text -- full text search
729 -> m (VersionedWithCount NgramsTable)
730 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
731 ns <- selectNodesWithUsername NodeList userMaster
732 let ngramsType = ngramsTypeFromTabType tabType
733 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
734 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
735 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
736
737
738
739 apiNgramsTableCorpus :: ( GargServerC env err m
740 )
741 => NodeId -> ServerT TableNgramsApi m
742 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
743 :<|> tableNgramsPut
744 :<|> scoresRecomputeTableNgrams cId
745 :<|> getTableNgramsVersion cId
746 :<|> apiNgramsAsync cId
747
748 apiNgramsTableDoc :: ( GargServerC env err m
749 )
750 => DocId -> ServerT TableNgramsApi m
751 apiNgramsTableDoc dId = getTableNgramsDoc dId
752 :<|> tableNgramsPut
753 :<|> scoresRecomputeTableNgrams dId
754 :<|> getTableNgramsVersion dId
755 :<|> apiNgramsAsync dId
756 -- > index all the corpus accordingly (TODO AD)
757
758 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
759 apiNgramsAsync _dId =
760 serveJobsAPI $
761 JobFunction $ \i log ->
762 let
763 log' x = do
764 printDebug "tableNgramsPostChartsAsync" x
765 liftBase $ log x
766 in tableNgramsPostChartsAsync i log'
767
768 -- Did the given list of ngrams changed since the given version?
769 -- The returned value is versioned boolean value, meaning that one always retrieve the
770 -- latest version.
771 -- If the given version is negative then one simply receive the latest version and True.
772 -- Using this function is more precise than simply comparing the latest version number
773 -- with the local version number. Indeed there might be no change to this particular list
774 -- and still the version number has changed because of other lists.
775 --
776 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
777 -- * currentVersion: good computation, good bandwidth, bad precision.
778 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
779 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
780 listNgramsChangedSince :: RepoCmdM env err m
781 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
782 listNgramsChangedSince listId ngramsType version
783 | version < 0 =
784 Versioned <$> currentVersion <*> pure True
785 | otherwise =
786 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
787
788
789
790