]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[FIX] cluster on connected components only and leaves others nodes alone (todo)....
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 module Gargantext.API.Ngrams
26 ( TableNgramsApi
27 , TableNgramsApiGet
28 , TableNgramsApiPut
29
30 , getTableNgrams
31 , setListNgrams
32 --, rmListNgrams TODO fix before exporting
33 , apiNgramsTableCorpus
34 , apiNgramsTableDoc
35
36 , NgramsStatePatch
37 , NgramsTablePatch
38 , NgramsTableMap
39
40 , NgramsTerm(..)
41
42 , NgramsElement(..)
43 , mkNgramsElement
44
45 , RootParent(..)
46
47 , MSet
48 , mSetFromList
49 , mSetToList
50
51 , Repo(..)
52 , r_version
53 , r_state
54 , r_history
55 , NgramsRepo
56 , NgramsRepoElement(..)
57 , saveRepo
58 , initRepo
59
60 , RepoEnv(..)
61 , renv_var
62 , renv_lock
63
64 , TabType(..)
65
66 , HasRepoVar(..)
67 , HasRepoSaver(..)
68 , HasRepo(..)
69 , RepoCmdM
70 , QueryParamR
71 , TODO
72
73 -- Internals
74 , getNgramsTableMap
75 , dumpJsonTableMap
76 , tableNgramsPull
77 , tableNgramsPut
78
79 , Version
80 , Versioned(..)
81 , VersionedWithCount(..)
82 , currentVersion
83 , listNgramsChangedSince
84 )
85 where
86
87 import Control.Concurrent
88 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
89 import Control.Monad.Reader
90 import Data.Aeson hiding ((.=))
91 import Data.Either (Either(..))
92 import Data.Foldable
93 import Data.Map.Strict (Map)
94 import Data.Maybe (fromMaybe)
95 import Data.Monoid
96 import Data.Ord (Down(..))
97 import Data.Patch.Class (Action(act), Transformable(..), ours)
98 import Data.Swagger hiding (version, patch)
99 import Data.Text (Text, isInfixOf, unpack)
100 import Data.Text.Lazy.IO as DTL
101 import Formatting (hprint, int, (%))
102 import GHC.Generics (Generic)
103 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
104 import Gargantext.API.Admin.Types (HasSettings)
105 import Gargantext.API.Ngrams.Types
106 import Gargantext.API.Prelude
107 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
108 import Gargantext.Core.Utils (something)
109 import Gargantext.Database.Action.Flow.Types
110 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
111 import Gargantext.Database.Admin.Config (userMaster)
112 import Gargantext.Database.Admin.Types.Node (NodeType(..))
113 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
114 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
115 import Gargantext.Database.Query.Table.Node (getNode)
116 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
117 import Gargantext.Database.Query.Table.Node.Select
118 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
119 import Gargantext.Prelude hiding (log)
120 import Gargantext.Prelude.Job
121 import Gargantext.Prelude.Clock (hasTime, getTime)
122 import Prelude (error)
123 import Servant hiding (Patch)
124 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
125 import System.IO (stderr)
126 import Test.QuickCheck (elements)
127 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
128 import qualified Data.Aeson.Text as DAT
129 import qualified Data.List as List
130 import qualified Data.Map.Strict as Map
131 import qualified Data.Map.Strict.Patch as PM
132 import qualified Data.Set as S
133 import qualified Data.Set as Set
134 import qualified Gargantext.API.Metrics as Metrics
135 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
136
137 {-
138 -- TODO sequences of modifications (Patchs)
139 type NgramsIdPatch = Patch NgramsId NgramsPatch
140
141 ngramsPatch :: Int -> NgramsPatch
142 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
143
144 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
145 toEdit n p = Edit n p
146 ngramsIdPatch :: Patch NgramsId NgramsPatch
147 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
148 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
149 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
150 ]
151
152 -- applyPatchBack :: Patch -> IO Patch
153 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
154 -}
155 ------------------------------------------------------------------------
156 ------------------------------------------------------------------------
157 ------------------------------------------------------------------------
158
159 {-
160 -- TODO: Replace.old is ignored which means that if the current list
161 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
162 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
163 -- However this should not happen in non conflicting situations.
164 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
165 mkListsUpdate nt patches =
166 [ (ngramsTypeId nt, ng, listTypeId lt)
167 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
168 , lt <- patch ^.. patch_list . new
169 ]
170
171 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
172 -> NgramsType
173 -> NgramsTablePatch
174 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
175 mkChildrenGroups addOrRem nt patches =
176 [ (ngramsTypeId nt, parent, child)
177 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
178 , child <- patch ^.. patch_children . to addOrRem . folded
179 ]
180 -}
181
182 ------------------------------------------------------------------------
183
184 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
185 => m ()
186 saveRepo = liftBase =<< view repoSaver
187
188 listTypeConflictResolution :: ListType -> ListType -> ListType
189 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
190
191 ngramsStatePatchConflictResolution
192 :: TableNgrams.NgramsType
193 -> NodeId
194 -> NgramsTerm
195 -> ConflictResolutionNgramsPatch
196 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
197 = (ours, (const ours, ours), (False, False))
198 -- (False, False) mean here that Mod has always priority.
199 -- (True, False) <- would mean priority to the left (same as ours).
200
201 -- undefined {- TODO think this through -}, listTypeConflictResolution)
202
203 -- Current state:
204 -- Insertions are not considered as patches,
205 -- they do not extend history,
206 -- they do not bump version.
207 insertNewOnly :: a -> Maybe b -> a
208 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
209 -- TODO error handling
210
211 {- unused
212 -- TODO refactor with putListNgrams
213 copyListNgrams :: RepoCmdM env err m
214 => NodeId -> NodeId -> NgramsType
215 -> m ()
216 copyListNgrams srcListId dstListId ngramsType = do
217 var <- view repoVar
218 liftBase $ modifyMVar_ var $
219 pure . (r_state . at ngramsType %~ (Just . f . something))
220 saveRepo
221 where
222 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
223 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
224
225 -- TODO refactor with putListNgrams
226 -- The list must be non-empty!
227 -- The added ngrams must be non-existent!
228 addListNgrams :: RepoCmdM env err m
229 => NodeId -> NgramsType
230 -> [NgramsElement] -> m ()
231 addListNgrams listId ngramsType nes = do
232 var <- view repoVar
233 liftBase $ modifyMVar_ var $
234 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
235 saveRepo
236 where
237 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
238 -}
239
240 -- UNSAFE
241 rmListNgrams :: RepoCmdM env err m
242 => ListId
243 -> TableNgrams.NgramsType
244 -> m ()
245 rmListNgrams l nt = setListNgrams l nt mempty
246
247 -- | TODO: incr the Version number
248 -- && should use patch
249 -- UNSAFE
250 setListNgrams :: RepoCmdM env err m
251 => NodeId
252 -> TableNgrams.NgramsType
253 -> Map NgramsTerm NgramsRepoElement
254 -> m ()
255 setListNgrams listId ngramsType ns = do
256 var <- view repoVar
257 liftBase $ modifyMVar_ var $
258 pure . ( r_state
259 . at ngramsType %~
260 (Just .
261 (at listId .~ ( Just ns))
262 . something
263 )
264 )
265 saveRepo
266
267
268 currentVersion :: RepoCmdM env err m
269 => m Version
270 currentVersion = do
271 var <- view repoVar
272 r <- liftBase $ readMVar var
273 pure $ r ^. r_version
274
275 newNgramsFromNgramsStatePatch :: NgramsStatePatch -> [Ngrams]
276 newNgramsFromNgramsStatePatch p =
277 [ text2ngrams (unNgramsTerm n)
278 | (n,np) <- p ^.. _PatchMap . each . _PatchMap . each . _NgramsTablePatch . _PatchMap . ifolded . withIndex
279 , _ <- np ^.. patch_new . _Just
280 ]
281
282 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
283 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
284 commitStatePatch (Versioned p_version p) = do
285 var <- view repoVar
286 vq' <- liftBase $ modifyMVar var $ \r -> do
287 let
288 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
289 (p', q') = transformWith ngramsStatePatchConflictResolution p q
290 r' = r & r_version +~ 1
291 & r_state %~ act p'
292 & r_history %~ (p' :)
293 {-
294 -- Ideally we would like to check these properties. However:
295 -- * They should be checked only to debug the code. The client data
296 -- should be able to trigger these.
297 -- * What kind of error should they throw (we are in IO here)?
298 -- * Should we keep modifyMVar?
299 -- * Should we throw the validation in an Exception, catch it around
300 -- modifyMVar and throw it back as an Error?
301 assertValid $ transformable p q
302 assertValid $ applicable p' (r ^. r_state)
303 -}
304 pure (r', Versioned (r' ^. r_version) q')
305
306 saveRepo
307
308 -- Save new ngrams
309 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
310
311 pure vq'
312
313 -- This is a special case of tableNgramsPut where the input patch is empty.
314 tableNgramsPull :: RepoCmdM env err m
315 => ListId
316 -> TableNgrams.NgramsType
317 -> Version
318 -> m (Versioned NgramsTablePatch)
319 tableNgramsPull listId ngramsType p_version = do
320 var <- view repoVar
321 r <- liftBase $ readMVar var
322
323 let
324 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
325 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
326
327 pure (Versioned (r ^. r_version) q_table)
328
329 -- Apply the given patch to the DB and returns the patch to be applied on the
330 -- client.
331 -- TODO-ACCESS check
332 tableNgramsPut :: ( FlowCmdM env err m
333 , HasSettings env
334 )
335 => TabType
336 -> ListId
337 -> Versioned NgramsTablePatch
338 -> m (Versioned NgramsTablePatch)
339 tableNgramsPut tabType listId (Versioned p_version p_table)
340 | p_table == mempty = do
341 let ngramsType = ngramsTypeFromTabType tabType
342 tableNgramsPull listId ngramsType p_version
343
344 | otherwise = do
345 let ngramsType = ngramsTypeFromTabType tabType
346 (p0, p0_validity) = PM.singleton listId p_table
347 (p, p_validity) = PM.singleton ngramsType p0
348
349 assertValid p0_validity
350 assertValid p_validity
351
352 ret <- commitStatePatch (Versioned p_version p)
353 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
354
355 pure ret
356
357
358 tableNgramsPostChartsAsync :: ( FlowCmdM env err m
359 , HasNodeError err
360 , HasSettings env
361 )
362 => UpdateTableNgramsCharts
363 -> (JobLog -> m ())
364 -> m JobLog
365 tableNgramsPostChartsAsync utn logStatus = do
366 let tabType = utn ^. utn_tab_type
367 let listId = utn ^. utn_list_id
368
369 node <- getNode listId
370 let nId = node ^. node_id
371 _uId = node ^. node_user_id
372 mCId = node ^. node_parent_id
373
374 printDebug "[tableNgramsPut] tabType" tabType
375 printDebug "[tableNgramsPut] listId" listId
376
377 case mCId of
378 Nothing -> do
379 printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
380 pure $ jobLogFail $ jobLogInit 1
381 Just cId -> do
382 case tabType of
383 Authors -> do
384 -- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
385 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
386 logRef
387 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
388 logRefSuccess
389
390 getRef
391 Institutes -> do
392 -- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
393 -- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
394 (logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
395 logRef
396 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
397 -- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
398 logRefSuccess
399 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
400 -- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
401 logRefSuccess
402 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
403 logRefSuccess
404
405 getRef
406 Sources -> do
407 -- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
408 (logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
409 logRef
410 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
411 logRefSuccess
412
413 getRef
414 Terms -> do
415 -- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
416 (logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
417 logRef
418 {-
419 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
420 logRefSuccess
421 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
422 logRefSuccess
423 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
424 logRefSuccess
425 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
426 logRefSuccess
427 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
428 logRefSuccess
429 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
430 -}
431 logRefSuccess
432
433 getRef
434 _ -> do
435 printDebug "[tableNgramsPut] no update for tabType = " tabType
436 pure $ jobLogFail $ jobLogInit 1
437
438 {-
439 { _ne_list :: ListType
440 If we merge the parents/children we can potentially create cycles!
441 , _ne_parent :: Maybe NgramsTerm
442 , _ne_children :: MSet NgramsTerm
443 }
444 -}
445
446 getNgramsTableMap :: RepoCmdM env err m
447 => NodeId
448 -> TableNgrams.NgramsType
449 -> m (Versioned NgramsTableMap)
450 getNgramsTableMap nodeId ngramsType = do
451 v <- view repoVar
452 repo <- liftBase $ readMVar v
453 pure $ Versioned (repo ^. r_version)
454 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
455
456 dumpJsonTableMap :: RepoCmdM env err m
457 => Text
458 -> NodeId
459 -> TableNgrams.NgramsType
460 -> m ()
461 dumpJsonTableMap fpath nodeId ngramsType = do
462 m <- getNgramsTableMap nodeId ngramsType
463 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
464 pure ()
465
466 type MinSize = Int
467 type MaxSize = Int
468
469 -- | TODO Errors management
470 -- TODO: polymorphic for Annuaire or Corpus or ...
471 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
472 -- TODO: should take only one ListId
473
474
475 getTableNgrams :: forall env err m.
476 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
477 => NodeType -> NodeId -> TabType
478 -> ListId -> Limit -> Maybe Offset
479 -> Maybe ListType
480 -> Maybe MinSize -> Maybe MaxSize
481 -> Maybe OrderBy
482 -> (NgramsTerm -> Bool)
483 -> m (VersionedWithCount NgramsTable)
484 getTableNgrams _nType nId tabType listId limit_ offset
485 listType minSize maxSize orderBy searchQuery = do
486
487 t0 <- getTime
488 -- lIds <- selectNodesWithUsername NodeList userMaster
489 let
490 ngramsType = ngramsTypeFromTabType tabType
491 offset' = maybe 0 identity offset
492 listType' = maybe (const True) (==) listType
493 minSize' = maybe (const True) (<=) minSize
494 maxSize' = maybe (const True) (>=) maxSize
495
496 selected_node n = minSize' s
497 && maxSize' s
498 && searchQuery (n ^. ne_ngrams)
499 && listType' (n ^. ne_list)
500 where
501 s = n ^. ne_size
502
503 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
504
505 ---------------------------------------
506 sortOnOrder Nothing = identity
507 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
508 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
509 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
510 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
511
512 ---------------------------------------
513
514 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
515 filteredNodes tableMap = rootOf <$> list & filter selected_node
516 where
517 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
518 (tableMap ^. at r)
519 )
520 (ne ^. ne_root)
521 list = tableMap ^.. each
522
523 ---------------------------------------
524 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
525 selectAndPaginate tableMap = roots <> inners
526 where
527 list = tableMap ^.. each
528 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
529 (tableMap ^. at r)
530 )
531 (ne ^. ne_root)
532 selected_nodes = list & take limit_
533 . drop offset'
534 . filter selected_node
535 . sortOnOrder orderBy
536 roots = rootOf <$> selected_nodes
537 rootsSet = Set.fromList (_ne_ngrams <$> roots)
538 inners = list & filter (selected_inner rootsSet)
539
540 ---------------------------------------
541 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
542 setScores False table = pure table
543 setScores True table = do
544 let ngrams_terms = table ^.. each . ne_ngrams
545 t1 <- getTime
546 occurrences <- getOccByNgramsOnlyFast' nId
547 listId
548 ngramsType
549 ngrams_terms
550 t2 <- getTime
551 liftBase $ hprint stderr
552 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
553 (length ngrams_terms) t1 t2
554 {-
555 occurrences <- getOccByNgramsOnlySlow nType nId
556 (lIds <> [listId])
557 ngramsType
558 ngrams_terms
559 -}
560 let
561 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
562
563 pure $ table & each %~ setOcc
564 ---------------------------------------
565
566 -- lists <- catMaybes <$> listsWith userMaster
567 -- trace (show lists) $
568 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
569
570
571 let scoresNeeded = needsScores orderBy
572 tableMap1 <- getNgramsTableMap listId ngramsType
573 t1 <- getTime
574 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
575 . Map.mapWithKey ngramsElementFromRepo
576
577 fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
578 . filteredNodes
579 let fltrCount = length $ fltr ^. v_data . _NgramsTable
580
581 t2 <- getTime
582 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
583 . setScores (not scoresNeeded)
584 . selectAndPaginate
585 t3 <- getTime
586 liftBase $ hprint stderr
587 ("getTableNgrams total=" % hasTime
588 % " map1=" % hasTime
589 % " map2=" % hasTime
590 % " map3=" % hasTime
591 % " sql=" % (if scoresNeeded then "map2" else "map3")
592 % "\n"
593 ) t0 t3 t0 t1 t1 t2 t2 t3
594 pure $ toVersionedWithCount fltrCount tableMap3
595
596
597 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
598 scoresRecomputeTableNgrams nId tabType listId = do
599 tableMap <- getNgramsTableMap listId ngramsType
600 _ <- tableMap & v_data %%~ setScores
601 . Map.mapWithKey ngramsElementFromRepo
602
603 pure $ 1
604 where
605 ngramsType = ngramsTypeFromTabType tabType
606
607 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
608 setScores table = do
609 let ngrams_terms = table ^.. each . ne_ngrams
610 occurrences <- getOccByNgramsOnlyFast' nId
611 listId
612 ngramsType
613 ngrams_terms
614 let
615 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
616
617 pure $ table & each %~ setOcc
618
619
620
621 -- APIs
622
623 -- TODO: find a better place for the code above, All APIs stay here
624
625 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
626 deriving (Generic, Enum, Bounded, Read, Show)
627
628 instance FromHttpApiData OrderBy
629 where
630 parseUrlPiece "TermAsc" = pure TermAsc
631 parseUrlPiece "TermDesc" = pure TermDesc
632 parseUrlPiece "ScoreAsc" = pure ScoreAsc
633 parseUrlPiece "ScoreDesc" = pure ScoreDesc
634 parseUrlPiece _ = Left "Unexpected value of OrderBy"
635
636
637 instance ToParamSchema OrderBy
638 instance FromJSON OrderBy
639 instance ToJSON OrderBy
640 instance ToSchema OrderBy
641 instance Arbitrary OrderBy
642 where
643 arbitrary = elements [minBound..maxBound]
644
645 needsScores :: Maybe OrderBy -> Bool
646 needsScores (Just ScoreAsc) = True
647 needsScores (Just ScoreDesc) = True
648 needsScores _ = False
649
650 type TableNgramsApiGet = Summary " Table Ngrams API Get"
651 :> QueryParamR "ngramsType" TabType
652 :> QueryParamR "list" ListId
653 :> QueryParamR "limit" Limit
654 :> QueryParam "offset" Offset
655 :> QueryParam "listType" ListType
656 :> QueryParam "minTermSize" MinSize
657 :> QueryParam "maxTermSize" MaxSize
658 :> QueryParam "orderBy" OrderBy
659 :> QueryParam "search" Text
660 :> Get '[JSON] (VersionedWithCount NgramsTable)
661
662 type TableNgramsApiPut = Summary " Table Ngrams API Change"
663 :> QueryParamR "ngramsType" TabType
664 :> QueryParamR "list" ListId
665 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
666 :> Put '[JSON] (Versioned NgramsTablePatch)
667
668 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
669 :> QueryParamR "ngramsType" TabType
670 :> QueryParamR "list" ListId
671 :> "recompute" :> Post '[JSON] Int
672
673 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
674 :> QueryParamR "ngramsType" TabType
675 :> QueryParamR "list" ListId
676 :> Get '[JSON] Version
677
678 type TableNgramsApi = TableNgramsApiGet
679 :<|> TableNgramsApiPut
680 :<|> RecomputeScoresNgramsApiGet
681 :<|> "version" :> TableNgramsApiGetVersion
682 :<|> TableNgramsAsyncApi
683
684 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
685 :> "async"
686 :> "charts"
687 :> "update"
688 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
689
690 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
691 => NodeId
692 -> TabType
693 -> ListId
694 -> Limit
695 -> Maybe Offset
696 -> Maybe ListType
697 -> Maybe MinSize -> Maybe MaxSize
698 -> Maybe OrderBy
699 -> Maybe Text -- full text search
700 -> m (VersionedWithCount NgramsTable)
701 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
702 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
703 where
704 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
705
706 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
707 => NodeId
708 -> TabType
709 -> ListId
710 -> m Version
711 getTableNgramsVersion _nId _tabType _listId = currentVersion
712 -- TODO: limit?
713 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
714 -- This line above looks like a waste of computation to finally get only the version.
715 -- See the comment about listNgramsChangedSince.
716
717
718 -- | Text search is deactivated for now for ngrams by doc only
719 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
720 => DocId -> TabType
721 -> ListId -> Limit -> Maybe Offset
722 -> Maybe ListType
723 -> Maybe MinSize -> Maybe MaxSize
724 -> Maybe OrderBy
725 -> Maybe Text -- full text search
726 -> m (VersionedWithCount NgramsTable)
727 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
728 ns <- selectNodesWithUsername NodeList userMaster
729 let ngramsType = ngramsTypeFromTabType tabType
730 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
731 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
732 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
733
734
735
736 apiNgramsTableCorpus :: ( GargServerC env err m
737 )
738 => NodeId -> ServerT TableNgramsApi m
739 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
740 :<|> tableNgramsPut
741 :<|> scoresRecomputeTableNgrams cId
742 :<|> getTableNgramsVersion cId
743 :<|> apiNgramsAsync cId
744
745 apiNgramsTableDoc :: ( GargServerC env err m
746 )
747 => DocId -> ServerT TableNgramsApi m
748 apiNgramsTableDoc dId = getTableNgramsDoc dId
749 :<|> tableNgramsPut
750 :<|> scoresRecomputeTableNgrams dId
751 :<|> getTableNgramsVersion dId
752 :<|> apiNgramsAsync dId
753 -- > index all the corpus accordingly (TODO AD)
754
755 apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
756 apiNgramsAsync _dId =
757 serveJobsAPI $
758 JobFunction $ \i log ->
759 let
760 log' x = do
761 printDebug "tableNgramsPostChartsAsync" x
762 liftBase $ log x
763 in tableNgramsPostChartsAsync i log'
764
765 -- Did the given list of ngrams changed since the given version?
766 -- The returned value is versioned boolean value, meaning that one always retrieve the
767 -- latest version.
768 -- If the given version is negative then one simply receive the latest version and True.
769 -- Using this function is more precise than simply comparing the latest version number
770 -- with the local version number. Indeed there might be no change to this particular list
771 -- and still the version number has changed because of other lists.
772 --
773 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
774 -- * currentVersion: good computation, good bandwidth, bad precision.
775 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
776 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
777 listNgramsChangedSince :: RepoCmdM env err m
778 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
779 listNgramsChangedSince listId ngramsType version
780 | version < 0 =
781 Versioned <$> currentVersion <*> pure True
782 | otherwise =
783 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
784
785
786
787