]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[NLP] add support for arbitrary languages in INI file
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-|
2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams API
11
12 -- | TODO
13 get ngrams filtered by NgramsType
14 add get
15
16 -}
17
18 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
19
20 {-# LANGUAGE ConstraintKinds #-}
21 {-# LANGUAGE ScopedTypeVariables #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24
25 {-# LANGUAGE IncoherentInstances #-}
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , getTableNgramsCorpus
33 , setListNgrams
34 --, rmListNgrams TODO fix before exporting
35 , apiNgramsTableCorpus
36 , apiNgramsTableDoc
37
38 , NgramsTablePatch
39 , NgramsTableMap
40
41 , NgramsTerm(..)
42
43 , NgramsElement(..)
44 , mkNgramsElement
45
46 , RootParent(..)
47
48 , MSet
49 , mSetFromList
50 , mSetToList
51
52 , Repo(..)
53 , r_version
54 , r_state
55 , r_history
56 , NgramsRepoElement(..)
57 , saveNodeStory
58 , saveNodeStoryImmediate
59 , initRepo
60
61 , TabType(..)
62
63 , QueryParamR
64 , TODO
65
66 -- Internals
67 , getNgramsTableMap
68 , dumpJsonTableMap
69 , tableNgramsPull
70 , tableNgramsPut
71
72 , getNgramsTable'
73 , setNgramsTableScores
74
75 , Version
76 , Versioned(..)
77 , VersionedWithCount(..)
78 , currentVersion
79 , listNgramsChangedSince
80 , MinSize, MaxSize, OrderBy, NgramsTable
81 , UpdateTableNgramsCharts
82 )
83 where
84
85 import Control.Concurrent
86 import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
87 import Control.Monad.Reader
88 import Data.Aeson hiding ((.=))
89 import Data.Either (Either(..))
90 import Data.Foldable
91 import Data.Map.Strict (Map)
92 import Data.Maybe (fromMaybe)
93 import Data.Monoid
94 import Data.Ord (Down(..))
95 import Data.Patch.Class (Action(act), Transformable(..), ours)
96 import Data.Swagger hiding (version, patch)
97 import Data.Text (Text, isInfixOf, toLower, unpack, pack)
98 import Data.Text.Lazy.IO as DTL
99 import Formatting (hprint, int, (%))
100 import GHC.Generics (Generic)
101 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
102 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
103 import Gargantext.API.Admin.Types (HasSettings)
104 import Gargantext.API.Ngrams.Types
105 import Gargantext.API.Prelude
106 import Gargantext.Core.NodeStory
107 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
108 import Gargantext.API.Ngrams.Tools
109 import Gargantext.Database.Action.Flow.Types
110 import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
111 import Gargantext.Database.Admin.Config (userMaster)
112 import Gargantext.Database.Admin.Types.Node (NodeType(..))
113 import Gargantext.Database.Prelude (CmdCommon)
114 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
115 import Gargantext.Database.Query.Table.Node (getNode)
116 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
117 import Gargantext.Database.Query.Table.Node.Select
118 import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
119 import Gargantext.Prelude hiding (log)
120 import Gargantext.Prelude.Clock (hasTime, getTime)
121 import Prelude (error)
122 import Servant hiding (Patch)
123 import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
124 import System.IO (stderr)
125 import Test.QuickCheck (elements)
126 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
127 import qualified Data.Aeson.Text as DAT
128 import qualified Data.List as List
129 import qualified Data.Map.Strict as Map
130 import qualified Data.Map.Strict.Patch as PM
131 import qualified Data.Set as S
132 import qualified Data.Set as Set
133 import qualified Gargantext.API.Metrics as Metrics
134 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
135
136 {-
137 -- TODO sequences of modifications (Patchs)
138 type NgramsIdPatch = Patch NgramsId NgramsPatch
139
140 ngramsPatch :: Int -> NgramsPatch
141 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
142
143 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
144 toEdit n p = Edit n p
145 ngramsIdPatch :: Patch NgramsId NgramsPatch
146 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
147 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
148 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
149 ]
150
151 -- applyPatchBack :: Patch -> IO Patch
152 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
153 -}
154 ------------------------------------------------------------------------
155 ------------------------------------------------------------------------
156 ------------------------------------------------------------------------
157
158 {-
159 -- TODO: Replace.old is ignored which means that if the current list
160 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
161 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
162 -- However this should not happen in non conflicting situations.
163 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
164 mkListsUpdate nt patches =
165 [ (ngramsTypeId nt, ng, listTypeId lt)
166 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
167 , lt <- patch ^.. patch_list . new
168 ]
169
170 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
171 -> NgramsType
172 -> NgramsTablePatch
173 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
174 mkChildrenGroups addOrRem nt patches =
175 [ (ngramsTypeId nt, parent, child)
176 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
177 , child <- patch ^.. patch_children . to addOrRem . folded
178 ]
179 -}
180
181 ------------------------------------------------------------------------
182
183 saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
184 => m ()
185 saveNodeStory = do
186 saver <- view hasNodeStorySaver
187 liftBase $ do
188 --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
189 saver
190 --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
191
192
193 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
194 => m ()
195 saveNodeStoryImmediate = do
196 saver <- view hasNodeStoryImmediateSaver
197 liftBase $ do
198 --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
199 saver
200 --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
201
202 listTypeConflictResolution :: ListType -> ListType -> ListType
203 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
204
205
206 ngramsStatePatchConflictResolution
207 :: TableNgrams.NgramsType
208 -> NgramsTerm
209 -> ConflictResolutionNgramsPatch
210 ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
211 = (ours, (const ours, ours), (False, False))
212 -- (False, False) mean here that Mod has always priority.
213 -- = (ours, (const ours, ours), (True, False))
214 -- (True, False) <- would mean priority to the left (same as ours).
215 -- undefined {- TODO think this through -}, listTypeConflictResolution)
216
217
218
219
220 -- Current state:
221 -- Insertions are not considered as patches,
222 -- they do not extend history,
223 -- they do not bump version.
224 insertNewOnly :: a -> Maybe b -> a
225 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
226 -- TODO error handling
227
228 {- unused
229 -- TODO refactor with putListNgrams
230 copyListNgrams :: RepoCmdM env err m
231 => NodeId -> NodeId -> NgramsType
232 -> m ()
233 copyListNgrams srcListId dstListId ngramsType = do
234 var <- view repoVar
235 liftBase $ modifyMVar_ var $
236 pure . (r_state . at ngramsType %~ (Just . f . something))
237 saveNodeStory
238 where
239 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
240 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
241
242 -- TODO refactor with putListNgrams
243 -- The list must be non-empty!
244 -- The added ngrams must be non-existent!
245 addListNgrams :: RepoCmdM env err m
246 => NodeId -> NgramsType
247 -> [NgramsElement] -> m ()
248 addListNgrams listId ngramsType nes = do
249 var <- view repoVar
250 liftBase $ modifyMVar_ var $
251 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
252 saveNodeStory
253 where
254 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
255 -}
256
257 -- | TODO: incr the Version number
258 -- && should use patch
259 -- UNSAFE
260
261 setListNgrams :: HasNodeStory env err m
262 => NodeId
263 -> TableNgrams.NgramsType
264 -> Map NgramsTerm NgramsRepoElement
265 -> m ()
266 setListNgrams listId ngramsType ns = do
267 -- printDebug "[setListNgrams]" (listId, ngramsType)
268 getter <- view hasNodeStory
269 var <- liftBase $ (getter ^. nse_getter) [listId]
270 liftBase $ modifyMVar_ var $
271 pure . ( unNodeStory
272 . at listId . _Just
273 . a_state
274 . at ngramsType
275 .~ Just ns
276 )
277 saveNodeStory
278
279
280 newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
281 newNgramsFromNgramsStatePatch p =
282 [ text2ngrams (unNgramsTerm n)
283 | (n,np) <- p ^.. _PatchMap
284 -- . each . _PatchMap
285 . each . _NgramsTablePatch
286 . _PatchMap . ifolded . withIndex
287 , _ <- np ^.. patch_new . _Just
288 ]
289
290
291
292
293 commitStatePatch :: ( HasNodeStory env err m
294 , HasNodeStoryImmediateSaver env
295 , HasNodeArchiveStoryImmediateSaver env
296 , CmdCommon env )
297 => ListId
298 -> Versioned NgramsStatePatch'
299 -> m (Versioned NgramsStatePatch')
300 commitStatePatch listId (Versioned _p_version p) = do
301 -- printDebug "[commitStatePatch]" listId
302 var <- getNodeStoryVar [listId]
303 archiveSaver <- view hasNodeArchiveStoryImmediateSaver
304 vq' <- liftBase $ modifyMVar var $ \ns -> do
305 let
306 a = ns ^. unNodeStory . at listId . _Just
307 -- apply patches from version p_version to a ^. a_version
308 -- TODO Check this
309 --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
310 q = mconcat $ a ^. a_history
311
312 --printDebug "[commitStatePatch] transformWith" (p,q)
313 -- let tws s = case s of
314 -- (Mod p) -> "Mod"
315 -- _ -> "Rpl"
316 -- printDebug "[commitStatePatch] transformWith" (tws $ p ^. _NgramsPatch, tws $ q ^. _NgramsPatch)
317
318 let
319 (p', q') = transformWith ngramsStatePatchConflictResolution p q
320 a' = a & a_version +~ 1
321 & a_state %~ act p'
322 & a_history %~ (p' :)
323
324 {-
325 -- Ideally we would like to check these properties. However:
326 -- * They should be checked only to debug the code. The client data
327 -- should be able to trigger these.
328 -- * What kind of error should they throw (we are in IO here)?
329 -- * Should we keep modifyMVar?
330 -- * Should we throw the validation in an Exception, catch it around
331 -- modifyMVar and throw it back as an Error?
332 assertValid $ transformable p q
333 assertValid $ applicable p' (r ^. r_state)
334 -}
335 -- printDebug "[commitStatePatch] a version" (a ^. a_version)
336 -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
337 let newNs = ( ns & unNodeStory . at listId .~ (Just a')
338 , Versioned (a' ^. a_version) q'
339 )
340
341 -- NOTE Now is the only good time to save the archive history. We
342 -- have the handle to the MVar and we need to save its exact
343 -- snapshot. Node Story archive is a linear table, so it's only
344 -- couple of inserts, it shouldn't take long...
345
346 -- If we postponed saving the archive to the debounce action, we
347 -- would have issues like
348 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
349 -- where the `q` computation from above (which uses the archive)
350 -- would cause incorrect patch application (before the previous
351 -- archive was saved and applied)
352 newNs' <- archiveSaver $ fst newNs
353
354 pure (newNs', snd newNs)
355
356 -- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce)
357 saveNodeStory
358 --saveNodeStoryImmediate
359 -- Save new ngrams
360 _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
361
362 pure vq'
363
364
365
366 -- This is a special case of tableNgramsPut where the input patch is empty.
367 tableNgramsPull :: HasNodeStory env err m
368 => ListId
369 -> TableNgrams.NgramsType
370 -> Version
371 -> m (Versioned NgramsTablePatch)
372 tableNgramsPull listId ngramsType p_version = do
373 -- printDebug "[tableNgramsPull]" (listId, ngramsType)
374 var <- getNodeStoryVar [listId]
375 r <- liftBase $ readMVar var
376
377 let
378 a = r ^. unNodeStory . at listId . _Just
379 q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
380 q_table = q ^. _PatchMap . at ngramsType . _Just
381
382 pure (Versioned (a ^. a_version) q_table)
383
384
385
386
387 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
388 -- Apply the given patch to the DB and returns the patch to be applied on the
389 -- client.
390 -- TODO-ACCESS check
391 tableNgramsPut :: ( HasNodeStory env err m
392 , HasNodeStoryImmediateSaver env
393 , HasNodeArchiveStoryImmediateSaver env
394 , HasInvalidError err
395 , CmdCommon env
396 )
397 => TabType
398 -> ListId
399 -> Versioned NgramsTablePatch
400 -> m (Versioned NgramsTablePatch)
401 tableNgramsPut tabType listId (Versioned p_version p_table)
402 | p_table == mempty = do
403 -- printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
404 let ngramsType = ngramsTypeFromTabType tabType
405 tableNgramsPull listId ngramsType p_version
406
407 | otherwise = do
408 -- printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
409 let ngramsType = ngramsTypeFromTabType tabType
410 (p, p_validity) = PM.singleton ngramsType p_table
411
412 assertValid p_validity
413
414 ret <- commitStatePatch listId (Versioned p_version p)
415 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
416
417 pure ret
418
419
420
421 tableNgramsPostChartsAsync :: ( HasNodeStory env err m
422 , FlowCmdM env err m
423 , HasNodeError err
424 , HasSettings env
425 , MonadJobStatus m
426 )
427 => UpdateTableNgramsCharts
428 -> JobHandle m
429 -> m ()
430 tableNgramsPostChartsAsync utn jobHandle = do
431 let tabType = utn ^. utn_tab_type
432 let listId = utn ^. utn_list_id
433
434 node <- getNode listId
435 let _nId = node ^. node_id
436 _uId = node ^. node_user_id
437 mCId = node ^. node_parent_id
438
439 -- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
440 -- printDebug "[tableNgramsPostChartsAsync] listId" listId
441
442 case mCId of
443 Nothing -> do
444 -- printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
445 markStarted 1 jobHandle
446 markFailed Nothing jobHandle
447 Just cId -> do
448 case tabType of
449 Authors -> do
450 -- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
451 markStarted 1 jobHandle
452 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
453 markComplete jobHandle
454 Institutes -> do
455 -- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
456 -- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
457 markStarted 3 jobHandle
458 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
459 -- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
460 markProgress 1 jobHandle
461 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
462 -- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
463 markProgress 1 jobHandle
464 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
465 markComplete jobHandle
466 Sources -> do
467 -- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
468 markStarted 1 jobHandle
469 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
470 markComplete jobHandle
471 Terms -> do
472 -- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
473 markStarted 6 jobHandle
474 {-
475 _ <- Metrics.updateChart cId (Just listId) tabType Nothing
476 logRefSuccess
477 _ <- Metrics.updatePie cId (Just listId) tabType Nothing
478 logRefSuccess
479 _ <- Metrics.updateScatter cId (Just listId) tabType Nothing
480 logRefSuccess
481 _ <- Metrics.updateTree cId (Just listId) tabType StopTerm
482 logRefSuccess
483 _ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
484 logRefSuccess
485 _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
486 -}
487 markComplete jobHandle
488 _ -> do
489 -- printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
490 markStarted 1 jobHandle
491 markFailed Nothing jobHandle
492
493 {-
494 { _ne_list :: ListType
495 If we merge the parents/children we can potentially create cycles!
496 , _ne_parent :: Maybe NgramsTerm
497 , _ne_children :: MSet NgramsTerm
498 }
499 -}
500
501 getNgramsTableMap :: HasNodeStory env err m
502 => NodeId
503 -> TableNgrams.NgramsType
504 -> m (Versioned NgramsTableMap)
505 getNgramsTableMap nodeId ngramsType = do
506 v <- getNodeStoryVar [nodeId]
507 repo <- liftBase $ readMVar v
508 pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
509 (repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
510
511
512 dumpJsonTableMap :: HasNodeStory env err m
513 => Text
514 -> NodeId
515 -> TableNgrams.NgramsType
516 -> m ()
517 dumpJsonTableMap fpath nodeId ngramsType = do
518 m <- getNgramsTableMap nodeId ngramsType
519 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
520 pure ()
521
522
523 type MinSize = Int
524 type MaxSize = Int
525
526 -- | TODO Errors management
527 -- TODO: polymorphic for Annuaire or Corpus or ...
528 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
529 -- TODO: should take only one ListId
530
531
532 getTableNgrams :: forall env err m.
533 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
534 => NodeType -> NodeId -> TabType
535 -> ListId -> Limit -> Maybe Offset
536 -> Maybe ListType
537 -> Maybe MinSize -> Maybe MaxSize
538 -> Maybe OrderBy
539 -> (NgramsTerm -> Bool)
540 -> m (VersionedWithCount NgramsTable)
541 getTableNgrams _nType nId tabType listId limit_ offset
542 listType minSize maxSize orderBy searchQuery = do
543
544 t0 <- getTime
545 -- lIds <- selectNodesWithUsername NodeList userMaster
546 let
547 ngramsType = ngramsTypeFromTabType tabType
548 offset' = maybe 0 identity offset
549 listType' = maybe (const True) (==) listType
550 minSize' = maybe (const True) (<=) minSize
551 maxSize' = maybe (const True) (>=) maxSize
552
553 rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
554 (tableMap ^. at r)
555 )
556 (ne ^. ne_root)
557
558 selected_node n = minSize' s
559 && maxSize' s
560 && searchQuery (n ^. ne_ngrams)
561 && listType' (n ^. ne_list)
562 where
563 s = n ^. ne_size
564
565 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
566
567 ---------------------------------------
568 sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
569 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
570 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
571 sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to List.nub . to length)
572 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to List.nub . to length)
573
574 ---------------------------------------
575 -- | Filter the given `tableMap` with the search criteria.
576 filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
577 filteredNodes tableMap = roots
578 where
579 list = tableMap ^.. each
580 selected_nodes = list & filter selected_node
581 roots = rootOf tableMap <$> selected_nodes
582
583 -- | Appends subitems (selected from `tableMap`) for given `roots`.
584 withInners :: Map NgramsTerm NgramsElement -> [NgramsElement] -> [NgramsElement]
585 withInners tableMap roots = roots <> inners
586 where
587 list = tableMap ^.. each
588 rootSet = Set.fromList (_ne_ngrams <$> roots)
589 inners = list & filter (selected_inner rootSet)
590
591 -- | Paginate the results
592 sortAndPaginate :: [NgramsElement] -> [NgramsElement]
593 sortAndPaginate = take limit_
594 . drop offset'
595 . sortOnOrder orderBy
596
597 ---------------------------------------
598
599 let scoresNeeded = needsScores orderBy
600 t1 <- getTime
601
602 tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
603
604 let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
605
606 let fltrCount = length $ fltr ^. v_data . _NgramsTable
607
608 t2 <- getTime
609 let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
610 t3 <- getTime
611 --printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
612 liftBase $ do
613 hprint stderr
614 ("getTableNgrams total=" % hasTime
615 % " map1=" % hasTime
616 % " map2=" % hasTime
617 % " map3=" % hasTime
618 % " sql=" % (if scoresNeeded then "map2" else "map3")
619 % "\n"
620 ) t0 t3 t0 t1 t1 t2 t2 t3
621
622 -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
623 pure $ toVersionedWithCount fltrCount tableMapSorted
624
625
626 -- | Helper function to get the ngrams table with scores.
627 getNgramsTable' :: forall env err m.
628 ( HasNodeStory env err m
629 , HasNodeError err
630 , CmdCommon env)
631 => NodeId
632 -> ListId
633 -> TableNgrams.NgramsType
634 -> m (Versioned (Map.Map NgramsTerm NgramsElement))
635 getNgramsTable' nId listId ngramsType = do
636 tableMap <- getNgramsTableMap listId ngramsType
637 tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
638 . Map.mapWithKey ngramsElementFromRepo
639
640 -- | Helper function to set scores on an `NgramsTable`.
641 setNgramsTableScores :: forall env err m t.
642 ( Each t t NgramsElement NgramsElement
643 , HasNodeStory env err m
644 , HasNodeError err
645 , CmdCommon env )
646 => NodeId
647 -> ListId
648 -> TableNgrams.NgramsType
649 -> t
650 -> m t
651 setNgramsTableScores nId listId ngramsType table = do
652 t1 <- getTime
653 occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
654 --printDebug "[setNgramsTableScores] occurrences" occurrences
655 t2 <- getTime
656 liftBase $ do
657 let ngrams_terms = table ^.. each . ne_ngrams
658 -- printDebug "ngrams_terms" ngrams_terms
659 hprint stderr
660 ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
661 (length ngrams_terms) t1 t2
662 let
663 setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
664
665 --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
666
667 pure $ table & each %~ setOcc
668
669
670
671
672 scoresRecomputeTableNgrams :: forall env err m.
673 (HasNodeStory env err m, HasNodeError err, CmdCommon env)
674 => NodeId -> TabType -> ListId -> m Int
675 scoresRecomputeTableNgrams nId tabType listId = do
676 tableMap <- getNgramsTableMap listId ngramsType
677 _ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
678 . Map.mapWithKey ngramsElementFromRepo
679
680 pure $ 1
681 where
682 ngramsType = ngramsTypeFromTabType tabType
683
684
685 -- APIs
686
687 -- TODO: find a better place for the code above, All APIs stay here
688
689 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
690 deriving (Generic, Enum, Bounded, Read, Show)
691
692 instance FromHttpApiData OrderBy
693 where
694 parseUrlPiece "TermAsc" = pure TermAsc
695 parseUrlPiece "TermDesc" = pure TermDesc
696 parseUrlPiece "ScoreAsc" = pure ScoreAsc
697 parseUrlPiece "ScoreDesc" = pure ScoreDesc
698 parseUrlPiece _ = Left "Unexpected value of OrderBy"
699
700 instance ToHttpApiData OrderBy where
701 toUrlPiece = pack . show
702
703 instance ToParamSchema OrderBy
704 instance FromJSON OrderBy
705 instance ToJSON OrderBy
706 instance ToSchema OrderBy
707 instance Arbitrary OrderBy
708 where
709 arbitrary = elements [minBound..maxBound]
710
711 needsScores :: Maybe OrderBy -> Bool
712 needsScores (Just ScoreAsc) = True
713 needsScores (Just ScoreDesc) = True
714 needsScores _ = False
715
716 type TableNgramsApiGet = Summary " Table Ngrams API Get"
717 :> QueryParamR "ngramsType" TabType
718 :> QueryParamR "list" ListId
719 :> QueryParamR "limit" Limit
720 :> QueryParam "offset" Offset
721 :> QueryParam "listType" ListType
722 :> QueryParam "minTermSize" MinSize
723 :> QueryParam "maxTermSize" MaxSize
724 :> QueryParam "orderBy" OrderBy
725 :> QueryParam "search" Text
726 :> Get '[JSON] (VersionedWithCount NgramsTable)
727
728 type TableNgramsApiPut = Summary " Table Ngrams API Change"
729 :> QueryParamR "ngramsType" TabType
730 :> QueryParamR "list" ListId
731 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
732 :> Put '[JSON] (Versioned NgramsTablePatch)
733
734 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
735 :> QueryParamR "ngramsType" TabType
736 :> QueryParamR "list" ListId
737 :> "recompute" :> Post '[JSON] Int
738
739 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
740 :> QueryParamR "ngramsType" TabType
741 :> QueryParamR "list" ListId
742 :> Get '[JSON] Version
743
744 type TableNgramsApi = TableNgramsApiGet
745 :<|> TableNgramsApiPut
746 :<|> RecomputeScoresNgramsApiGet
747 :<|> "version" :> TableNgramsApiGetVersion
748 :<|> TableNgramsAsyncApi
749
750 type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
751 :> "async"
752 :> "charts"
753 :> "update"
754 :> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
755
756 getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
757 => NodeId
758 -> TabType
759 -> ListId
760 -> Limit
761 -> Maybe Offset
762 -> Maybe ListType
763 -> Maybe MinSize -> Maybe MaxSize
764 -> Maybe OrderBy
765 -> Maybe Text -- full text search
766 -> m (VersionedWithCount NgramsTable)
767 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
768 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
769 where
770 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
771
772
773
774 getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, CmdCommon env)
775 => NodeId
776 -> TabType
777 -> ListId
778 -> m Version
779 getTableNgramsVersion _nId _tabType listId = currentVersion listId
780
781
782
783 -- TODO: limit?
784 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
785 -- This line above looks like a waste of computation to finally get only the version.
786 -- See the comment about listNgramsChangedSince.
787
788
789 -- | Text search is deactivated for now for ngrams by doc only
790 getTableNgramsDoc :: ( HasNodeStory env err m, HasNodeError err, CmdCommon env)
791 => DocId -> TabType
792 -> ListId -> Limit -> Maybe Offset
793 -> Maybe ListType
794 -> Maybe MinSize -> Maybe MaxSize
795 -> Maybe OrderBy
796 -> Maybe Text -- full text search
797 -> m (VersionedWithCount NgramsTable)
798 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
799 ns <- selectNodesWithUsername NodeList userMaster
800 let ngramsType = ngramsTypeFromTabType tabType
801 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
802 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
803 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
804
805
806
807 apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
808 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
809 :<|> tableNgramsPut
810 :<|> scoresRecomputeTableNgrams cId
811 :<|> getTableNgramsVersion cId
812 :<|> apiNgramsAsync cId
813
814 apiNgramsTableDoc :: DocId -> ServerT TableNgramsApi (GargM Env GargError)
815 apiNgramsTableDoc dId = getTableNgramsDoc dId
816 :<|> tableNgramsPut
817 :<|> scoresRecomputeTableNgrams dId
818 :<|> getTableNgramsVersion dId
819 :<|> apiNgramsAsync dId
820
821 apiNgramsAsync :: NodeId -> ServerT TableNgramsAsyncApi (GargM Env GargError)
822 apiNgramsAsync _dId =
823 serveJobsAPI TableNgramsJob $ \jHandle i -> withTracer (printDebug "tableNgramsPostChartsAsync") jHandle $
824 \jHandle' -> tableNgramsPostChartsAsync i jHandle'
825
826 -- Did the given list of ngrams changed since the given version?
827 -- The returned value is versioned boolean value, meaning that one always retrieve the
828 -- latest version.
829 -- If the given version is negative then one simply receive the latest version and True.
830 -- Using this function is more precise than simply comparing the latest version number
831 -- with the local version number. Indeed there might be no change to this particular list
832 -- and still the version number has changed because of other lists.
833 --
834 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
835 -- * currentVersion: good computation, good bandwidth, bad precision.
836 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
837 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
838 listNgramsChangedSince :: HasNodeStory env err m
839 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
840 listNgramsChangedSince listId ngramsType version
841 | version < 0 =
842 Versioned <$> currentVersion listId <*> pure True
843 | otherwise =
844 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)