]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[CLEAN] fix imports / explicit instances / warnings
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsStatePatch
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepo
56 , NgramsRepoElement(..)
57 , saveRepo
58 , initRepo
59
60 , RepoEnv(..)
61 , renv_var
62 , renv_lock
63
64 , TabType(..)
65
66 , QueryParamR
67 , TODO
68
69 -- Internals
70 , getNgramsTableMap
71 , dumpJsonTableMap
72 , tableNgramsPull
73 , tableNgramsPut
74
75 , Version
76 , Versioned(..)
77 , VersionedWithCount(..)
78 , currentVersion
79 , listNgramsChangedSince
80 )
81 where
82
83 import Control.Concurrent
84 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
85 import Control.Monad.Reader
86 import Data.Aeson hiding ((.=))
87 import Data.Either (Either(..))
88 import Data.Foldable
89 import Data.Map.Strict (Map)
90 import Data.Maybe (fromMaybe)
91 import Data.Monoid
92 import Data.Ord (Down(..))
93 import Data.Patch.Class (Action(act), Transformable(..), ours)
94 import Data.Swagger hiding (version, patch)
95 import Data.Text (Text, isInfixOf, unpack)
96 import Data.Text.Lazy.IO as DTL
97 import Formatting (hprint, int, (%))
98 import GHC.Generics (Generic)
99 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
100 import Gargantext.API.Admin.Types (HasSettings)
101 import Gargantext.API.Job
102 import Gargantext.API.Ngrams.Types
103 import Gargantext.API.Prelude
104 import Gargantext.Core.NodeStory
105 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
106 import Gargantext.API.Ngrams.Tools
107 import Gargantext.Database.Action.Flow.Types
108 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
109 import Gargantext.Database.Admin.Config (userMaster)
110 import Gargantext.Database.Admin.Types.Node (NodeType(..))
111 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
112 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
113 import Gargantext.Database.Query.Table.Node (getNode)
114 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
115 import Gargantext.Database.Query.Table.Node.Select
116 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
117 import Gargantext.Prelude hiding (log)
118 import Gargantext.Prelude.Clock (hasTime, getTime)
119 import Prelude (error)
120 import Servant hiding (Patch)
121 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
122 import System.IO (stderr)
123 import Test.QuickCheck (elements)
124 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
125 import qualified Data.Aeson.Text as DAT
126 import qualified Data.List as List
127 import qualified Data.Map.Strict as Map
128 import qualified Data.Map.Strict.Patch as PM
129 import qualified Data.Set as S
130 import qualified Data.Set as Set
131 import qualified Gargantext.API.Metrics as Metrics
132 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
133
134 {-
135 -- TODO sequences of modifications (Patchs)
136 type NgramsIdPatch = Patch NgramsId NgramsPatch
137
138 ngramsPatch :: Int -> NgramsPatch
139 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
140
141 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
142 toEdit n p = Edit n p
143 ngramsIdPatch :: Patch NgramsId NgramsPatch
144 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
145 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
146 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
147 ]
148
149 -- applyPatchBack :: Patch -> IO Patch
150 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
151 -}
152 ------------------------------------------------------------------------
153 ------------------------------------------------------------------------
154 ------------------------------------------------------------------------
155
156 {-
157 -- TODO: Replace.old is ignored which means that if the current list
158 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
159 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
160 -- However this should not happen in non conflicting situations.
161 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
162 mkListsUpdate nt patches =
163 [ (ngramsTypeId nt, ng, listTypeId lt)
164 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
165 , lt <- patch ^.. patch_list . new
166 ]
167
168 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
169 -> NgramsType
170 -> NgramsTablePatch
171 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
172 mkChildrenGroups addOrRem nt patches =
173 [ (ngramsTypeId nt, parent, child)
174 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
175 , child <- patch ^.. patch_children . to addOrRem . folded
176 ]
177 -}
178
179 ------------------------------------------------------------------------
180
181 saveRepo :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
182 => m ()
183 saveRepo = liftBase =<< view hasNodeStorySaver
184
185
186
187 listTypeConflictResolution :: ListType -> ListType -> ListType
188 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
189
190
191 ngramsStatePatchConflictResolution
192 :: TableNgrams.NgramsType
193 -> NgramsTerm
194 -> ConflictResolutionNgramsPatch
195 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
196 = (ours, (const ours, ours), (False, False))
197 -- (False, False) mean here that Mod has always priority.
198 -- (True, False) <- would mean priority to the left (same as ours).
199 -- undefined {- TODO think this through -}, listTypeConflictResolution)
200
201
202
203
204 -- Current state:
205 -- Insertions are not considered as patches,
206 -- they do not extend history,
207 -- they do not bump version.
208 insertNewOnly :: a -> Maybe b -> a
209 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
210 -- TODO error handling
211
212 {- unused
213 -- TODO refactor with putListNgrams
214 copyListNgrams :: RepoCmdM env err m
215 => NodeId -> NodeId -> NgramsType
216 -> m ()
217 copyListNgrams srcListId dstListId ngramsType = do
218 var <- view repoVar
219 liftBase $ modifyMVar_ var $
220 pure . (r_state . at ngramsType %~ (Just . f . something))
221 saveRepo
222 where
223 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
224 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
225
226 -- TODO refactor with putListNgrams
227 -- The list must be non-empty!
228 -- The added ngrams must be non-existent!
229 addListNgrams :: RepoCmdM env err m
230 => NodeId -> NgramsType
231 -> [NgramsElement] -> m ()
232 addListNgrams listId ngramsType nes = do
233 var <- view repoVar
234 liftBase $ modifyMVar_ var $
235 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
236 saveRepo
237 where
238 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
239 -}
240
241 -- | TODO: incr the Version number
242 -- && should use patch
243 -- UNSAFE
244
245 setListNgrams :: HasNodeStory env err m
246 => NodeId
247 -> TableNgrams.NgramsType
248 -> Map NgramsTerm NgramsRepoElement
249 -> m ()
250 setListNgrams listId ngramsType ns = do
251 getter <- view hasNodeStory
252 var <- liftBase $ (getter ^. nse_getter) listId
253 liftBase $ modifyMVar_ var $
254 pure . ( unNodeStory
255 . at listId . _Just
256 . a_state
257 . at ngramsType
258 .~ Just ns
259 )
260 saveRepo
261
262
263 currentVersion :: HasNodeStory env err m
264 => ListId -> m Version
265 currentVersion listId = do
266 nls <- getRepo' [listId]
267 pure $ nls ^. unNodeStory . at listId . _Just . a_version
268
269
270 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
271 newNgramsFromNgramsStatePatch p =
272 [ text2ngrams (unNgramsTerm n)
273 | (n,np) <- p ^.. _PatchMap
274 -- . each . _PatchMap
275 . each . _NgramsTablePatch
276 . _PatchMap . ifolded . withIndex
277 , _ <- np ^.. patch_new . _Just
278 ]
279
280
281 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
282 commitStatePatch :: HasNodeStory env err m
283 => ListId
284 -> Versioned NgramsStatePatch'
285 -> m (Versioned NgramsStatePatch')
286 commitStatePatch listId (Versioned p_version p) = do
287 var <- getRepoVar listId
288 vq' <- liftBase $ modifyMVar var $ \ns -> do
289 let
290 a = ns ^. unNodeStory . at listId . _Just
291 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
292 (p', q') = transformWith ngramsStatePatchConflictResolution p q
293 a' = a & a_version +~ 1
294 & a_state %~ act p'
295 & a_history %~ (p' :)
296 pure ( ns & unNodeStory . at listId .~ (Just a')
297 , Versioned (a' ^. a_version) q'
298 )
299 saveRepo
300 -- Save new ngrams
301 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
302
303 pure $ vq'
304
305
306
307 -- This is a special case of tableNgramsPut where the input patch is empty.
308 tableNgramsPull :: HasNodeStory env err m
309 => ListId
310 -> TableNgrams.NgramsType
311 -> Version
312 -> m (Versioned NgramsTablePatch)
313 tableNgramsPull listId ngramsType p_version = do
314 var <- getRepoVar listId
315 r <- liftBase $ readMVar var
316
317 let
318 a = r ^. unNodeStory . at listId . _Just
319 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
320 q_table = q ^. _PatchMap . at ngramsType . _Just
321
322 pure (Versioned (a ^. a_version) q_table)
323
324
325
326
327 -- Apply the given patch to the DB and returns the patch to be applied on the
328 -- client.
329 -- TODO-ACCESS check
330 tableNgramsPut :: ( HasNodeStory env err m
331 , HasInvalidError err
332 , HasSettings env
333 )
334 => TabType
335 -> ListId
336 -> Versioned NgramsTablePatch
337 -> m (Versioned NgramsTablePatch)
338 tableNgramsPut tabType listId (Versioned p_version p_table)
339 | p_table == mempty = do
340 let ngramsType = ngramsTypeFromTabType tabType
341 tableNgramsPull listId ngramsType p_version
342
343 | otherwise = do
344 let ngramsType = ngramsTypeFromTabType tabType
345 (p, p_validity) = PM.singleton ngramsType p_table
346
347 assertValid p_validity
348
349 ret <- commitStatePatch listId (Versioned p_version p)
350 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
351
352 pure ret
353
354
355
356 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
357 , FlowCmdM env err m
358 , HasNodeError err
359 , HasSettings env
360 )
361 => UpdateTableNgramsCharts
362 -> (JobLog -> m ())
363 -> m JobLog
364 tableNgramsPostChartsAsync utn logStatus = do
365 let tabType = utn ^. utn_tab_type
366 let listId = utn ^. utn_list_id
367
368 node <- getNode listId
369 let nId = node ^. node_id
370 _uId = node ^. node_user_id
371 mCId = node ^. node_parent_id
372
373 printDebug "[tableNgramsPut] tabType" tabType
374 printDebug "[tableNgramsPut] listId" listId
375
376 case mCId of
377 Nothing -> do
378 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
379 pure $ jobLogFail $ jobLogInit 1
380 Just cId -> do
381 case tabType of
382 Authors -> do
383 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
384 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
385 logRef
386 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
387 logRefSuccess
388
389 getRef
390 Institutes -> do
391 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
392 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
393 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
394 logRef
395 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
396 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
397 logRefSuccess
398 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
399 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
400 logRefSuccess
401 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
402 logRefSuccess
403
404 getRef
405 Sources -> do
406 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
407 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
408 logRef
409 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
410 logRefSuccess
411
412 getRef
413 Terms -> do
414 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
415 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
416 logRef
417 {-
418 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
419 logRefSuccess
420 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
421 logRefSuccess
422 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
423 logRefSuccess
424 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
425 logRefSuccess
426 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
427 logRefSuccess
428 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
429 -}
430 logRefSuccess
431
432 getRef
433 _ -> do
434 printDebug "[tableNgramsPut] no update for tabType = " tabType
435 pure $ jobLogFail $ jobLogInit 1
436
437 {-
438 { _ne_list :: ListType
439 If we merge the parents/children we can potentially create cycles!
440 , _ne_parent :: Maybe NgramsTerm
441 , _ne_children :: MSet NgramsTerm
442 }
443 -}
444
445 getNgramsTableMap :: HasNodeStory env err m
446 => NodeId
447 -> TableNgrams.NgramsType
448 -> m (Versioned NgramsTableMap)
449 getNgramsTableMap nodeId ngramsType = do
450 v <- getRepoVar nodeId
451 repo <- liftBase $ readMVar v
452 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
453 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
454
455
456 dumpJsonTableMap :: HasNodeStory env err m
457 => Text
458 -> NodeId
459 -> TableNgrams.NgramsType
460 -> m ()
461 dumpJsonTableMap fpath nodeId ngramsType = do
462 m <- getNgramsTableMap nodeId ngramsType
463 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
464 pure ()
465
466
467 type MinSize = Int
468 type MaxSize = Int
469
470 -- | TODO Errors management
471 -- TODO: polymorphic for Annuaire or Corpus or ...
472 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
473 -- TODO: should take only one ListId
474
475
476 getTableNgrams :: forall env err m.
477 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
478 => NodeType -> NodeId -> TabType
479 -> ListId -> Limit -> Maybe Offset
480 -> Maybe ListType
481 -> Maybe MinSize -> Maybe MaxSize
482 -> Maybe OrderBy
483 -> (NgramsTerm -> Bool)
484 -> m (VersionedWithCount NgramsTable)
485 getTableNgrams _nType nId tabType listId limit_ offset
486 listType minSize maxSize orderBy searchQuery = do
487
488 t0 <- getTime
489 -- lIds <- selectNodesWithUsername NodeList userMaster
490 let
491 ngramsType = ngramsTypeFromTabType tabType
492 offset' = maybe 0 identity offset
493 listType' = maybe (const True) (==) listType
494 minSize' = maybe (const True) (<=) minSize
495 maxSize' = maybe (const True) (>=) maxSize
496
497 selected_node n = minSize' s
498 && maxSize' s
499 && searchQuery (n ^. ne_ngrams)
500 && listType' (n ^. ne_list)
501 where
502 s = n ^. ne_size
503
504 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
505
506 ---------------------------------------
507 sortOnOrder Nothing = identity
508 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
509 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
510 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
511 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
512
513 ---------------------------------------
514
515 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
516 filteredNodes tableMap = rootOf <$> list & filter selected_node
517 where
518 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
519 (tableMap ^. at r)
520 )
521 (ne ^. ne_root)
522 list = tableMap ^.. each
523
524 ---------------------------------------
525 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
526 selectAndPaginate tableMap = roots <> inners
527 where
528 list = tableMap ^.. each
529 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
530 (tableMap ^. at r)
531 )
532 (ne ^. ne_root)
533 selected_nodes = list & take limit_
534 . drop offset'
535 . filter selected_node
536 . sortOnOrder orderBy
537 roots = rootOf <$> selected_nodes
538 rootsSet = Set.fromList (_ne_ngrams <$> roots)
539 inners = list & filter (selected_inner rootsSet)
540
541 ---------------------------------------
542 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
543 setScores False table = pure table
544 setScores True table = do
545 let ngrams_terms = table ^.. each . ne_ngrams
546 t1 <- getTime
547 occurrences <- getOccByNgramsOnlyFast' nId
548 listId
549 ngramsType
550 ngrams_terms
551 t2 <- getTime
552 liftBase $ hprint stderr
553 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
554 (length ngrams_terms) t1 t2
555 {-
556 occurrences <- getOccByNgramsOnlySlow nType nId
557 (lIds <> [listId])
558 ngramsType
559 ngrams_terms
560 -}
561 let
562 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
563
564 pure $ table & each %~ setOcc
565 ---------------------------------------
566
567 -- lists <- catMaybes <$> listsWith userMaster
568 -- trace (show lists) $
569 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
570
571
572 let scoresNeeded = needsScores orderBy
573 tableMap1 <- getNgramsTableMap listId ngramsType
574 t1 <- getTime
575 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
576 . Map.mapWithKey ngramsElementFromRepo
577
578 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
579 . filteredNodes
580 let fltrCount = length $ fltr ^. v_data . _NgramsTable
581
582 t2 <- getTime
583 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
584 . setScores (not scoresNeeded)
585 . selectAndPaginate
586 t3 <- getTime
587 liftBase $ hprint stderr
588 ("getTableNgrams total=" % hasTime
589 % " map1=" % hasTime
590 % " map2=" % hasTime
591 % " map3=" % hasTime
592 % " sql=" % (if scoresNeeded then "map2" else "map3")
593 % "\n"
594 ) t0 t3 t0 t1 t1 t2 t2 t3
595 pure $ toVersionedWithCount fltrCount tableMap3
596
597
598
599 scoresRecomputeTableNgrams :: forall env err m.
600 (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
601 => NodeId -> TabType -> ListId -> m Int
602 scoresRecomputeTableNgrams nId tabType listId = do
603 tableMap <- getNgramsTableMap listId ngramsType
604 _ <- tableMap & v_data %%~ setScores
605 . Map.mapWithKey ngramsElementFromRepo
606
607 pure $ 1
608 where
609 ngramsType = ngramsTypeFromTabType tabType
610
611 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
612 setScores table = do
613 let ngrams_terms = table ^.. each . ne_ngrams
614 occurrences <- getOccByNgramsOnlyFast' nId
615 listId
616 ngramsType
617 ngrams_terms
618 let
619 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
620
621 pure $ table & each %~ setOcc
622
623
624
625
626 -- APIs
627
628 -- TODO: find a better place for the code above, All APIs stay here
629
630 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
631 deriving (Generic, Enum, Bounded, Read, Show)
632
633 instance FromHttpApiData OrderBy
634 where
635 parseUrlPiece "TermAsc" = pure TermAsc
636 parseUrlPiece "TermDesc" = pure TermDesc
637 parseUrlPiece "ScoreAsc" = pure ScoreAsc
638 parseUrlPiece "ScoreDesc" = pure ScoreDesc
639 parseUrlPiece _ = Left "Unexpected value of OrderBy"
640
641
642 instance ToParamSchema OrderBy
643 instance FromJSON OrderBy
644 instance ToJSON OrderBy
645 instance ToSchema OrderBy
646 instance Arbitrary OrderBy
647 where
648 arbitrary = elements [minBound..maxBound]
649
650 needsScores :: Maybe OrderBy -> Bool
651 needsScores (Just ScoreAsc) = True
652 needsScores (Just ScoreDesc) = True
653 needsScores _ = False
654
655 type TableNgramsApiGet = Summary " Table Ngrams API Get"
656 :> QueryParamR "ngramsType" TabType
657 :> QueryParamR "list" ListId
658 :> QueryParamR "limit" Limit
659 :> QueryParam "offset" Offset
660 :> QueryParam "listType" ListType
661 :> QueryParam "minTermSize" MinSize
662 :> QueryParam "maxTermSize" MaxSize
663 :> QueryParam "orderBy" OrderBy
664 :> QueryParam "search" Text
665 :> Get '[JSON] (VersionedWithCount NgramsTable)
666
667 type TableNgramsApiPut = Summary " Table Ngrams API Change"
668 :> QueryParamR "ngramsType" TabType
669 :> QueryParamR "list" ListId
670 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
671 :> Put '[JSON] (Versioned NgramsTablePatch)
672
673 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
674 :> QueryParamR "ngramsType" TabType
675 :> QueryParamR "list" ListId
676 :> "recompute" :> Post '[JSON] Int
677
678 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
679 :> QueryParamR "ngramsType" TabType
680 :> QueryParamR "list" ListId
681 :> Get '[JSON] Version
682
683 type TableNgramsApi = TableNgramsApiGet
684 :<|> TableNgramsApiPut
685 :<|> RecomputeScoresNgramsApiGet
686 :<|> "version" :> TableNgramsApiGetVersion
687 :<|> TableNgramsAsyncApi
688
689 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
690 :> "async"
691 :> "charts"
692 :> "update"
693 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
694
695 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
696 => NodeId
697 -> TabType
698 -> ListId
699 -> Limit
700 -> Maybe Offset
701 -> Maybe ListType
702 -> Maybe MinSize -> Maybe MaxSize
703 -> Maybe OrderBy
704 -> Maybe Text -- full text search
705 -> m (VersionedWithCount NgramsTable)
706 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
707 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
708 where
709 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
710
711
712
713 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
714 => NodeId
715 -> TabType
716 -> ListId
717 -> m Version
718 getTableNgramsVersion _nId _tabType listId = currentVersion listId
719
720
721
722 -- TODO: limit?
723 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
724 -- This line above looks like a waste of computation to finally get only the version.
725 -- See the comment about listNgramsChangedSince.
726
727
728 -- | Text search is deactivated for now for ngrams by doc only
729 getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
730 => DocId -> TabType
731 -> ListId -> Limit -> Maybe Offset
732 -> Maybe ListType
733 -> Maybe MinSize -> Maybe MaxSize
734 -> Maybe OrderBy
735 -> Maybe Text -- full text search
736 -> m (VersionedWithCount NgramsTable)
737 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
738 ns <- selectNodesWithUsername NodeList userMaster
739 let ngramsType = ngramsTypeFromTabType tabType
740 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
741 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
742 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
743
744
745
746 apiNgramsTableCorpus :: ( GargServerC env err m
747 )
748 => NodeId -> ServerT TableNgramsApi m
749 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
750 :<|> tableNgramsPut
751 :<|> scoresRecomputeTableNgrams cId
752 :<|> getTableNgramsVersion cId
753 :<|> apiNgramsAsync cId
754
755 apiNgramsTableDoc :: ( GargServerC env err m
756 )
757 => DocId -> ServerT TableNgramsApi m
758 apiNgramsTableDoc dId = getTableNgramsDoc dId
759 :<|> tableNgramsPut
760 :<|> scoresRecomputeTableNgrams dId
761 :<|> getTableNgramsVersion dId
762 :<|> apiNgramsAsync dId
763 -- > index all the corpus accordingly (TODO AD)
764
765 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
766 apiNgramsAsync _dId =
767 serveJobsAPI $
768 JobFunction $ \i log ->
769 let
770 log' x = do
771 printDebug "tableNgramsPostChartsAsync" x
772 liftBase $ log x
773 in tableNgramsPostChartsAsync i log'
774
775 -- Did the given list of ngrams changed since the given version?
776 -- The returned value is versioned boolean value, meaning that one always retrieve the
777 -- latest version.
778 -- If the given version is negative then one simply receive the latest version and True.
779 -- Using this function is more precise than simply comparing the latest version number
780 -- with the local version number. Indeed there might be no change to this particular list
781 -- and still the version number has changed because of other lists.
782 --
783 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
784 -- * currentVersion: good computation, good bandwidth, bad precision.
785 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
786 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
787 listNgramsChangedSince :: HasNodeStory env err m
788 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
789 listNgramsChangedSince listId ngramsType version
790 | version < 0 =
791 Versioned <$> currentVersion listId <*> pure True
792 | otherwise =
793 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
794
795