]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[DEPS] stack upgrade
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24 {-# OPTIONS -fno-warn-orphans #-}
25
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , setListNgrams
33 --, rmListNgrams TODO fix before exporting
34 , putListNgrams
35 --, putListNgrams'
36 , apiNgramsTableCorpus
37 , apiNgramsTableDoc
38
39 , NgramsStatePatch
40 , NgramsTablePatch
41 , NgramsTableMap
42
43 , NgramsElement(..)
44 , mkNgramsElement
45 , mergeNgramsElement
46
47 , RootParent(..)
48
49 , MSet
50 , mSetFromList
51 , mSetToList
52
53 , Repo(..)
54 , r_version
55 , r_state
56 , r_history
57 , NgramsRepo
58 , NgramsRepoElement(..)
59 , saveRepo
60 , initRepo
61
62 , RepoEnv(..)
63 , renv_var
64 , renv_lock
65
66 , TabType(..)
67 , ngramsTypeFromTabType
68
69 , HasRepoVar(..)
70 , HasRepoSaver(..)
71 , HasRepo(..)
72 , RepoCmdM
73 , QueryParamR
74 , TODO
75
76 -- Internals
77 , getNgramsTableMap
78 , dumpJsonTableMap
79 , tableNgramsPull
80 , tableNgramsPut
81
82 , Version
83 , Versioned(..)
84 , currentVersion
85 , listNgramsChangedSince
86 )
87 where
88
89 import Codec.Serialise (Serialise())
90 import Control.Category ((>>>))
91 import Control.Concurrent
92 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
93 import Control.Monad.Error.Class (MonadError)
94 import Control.Monad.Reader
95 import Control.Monad.State
96 import Control.Monad.Trans.Control (MonadBaseControl)
97 import Data.Aeson hiding ((.=))
98 import Data.Aeson.TH (deriveJSON)
99 import qualified Data.Aeson.Text as DAT
100 import Data.Either (Either(..))
101 import Data.Foldable
102 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
103 import qualified Data.List as List
104 import Data.Map.Strict (Map)
105 import qualified Data.Map.Strict as Map
106 import qualified Data.Map.Strict.Patch as PM
107 import Data.Maybe (fromMaybe)
108 import Data.Monoid
109 import Data.Ord (Down(..))
110 import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..),
111 PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours,
112 MaybePatch(Mod), unMod, old, new)
113 import Data.Set (Set)
114 import qualified Data.Set as S
115 import qualified Data.Set as Set
116 import Data.Swagger hiding (version, patch)
117 import Data.Text (Text, isInfixOf, unpack)
118 import Data.Text.Lazy.IO as DTL
119 import Data.Validity
120 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
121 import Formatting (hprint, int, (%))
122 import Formatting.Clock (timeSpecs)
123 import GHC.Generics (Generic)
124 import Servant hiding (Patch)
125 import System.Clock (getTime, TimeSpec, Clock(..))
126 import System.FileLock (FileLock)
127 import System.IO (stderr)
128 import Test.QuickCheck (elements, frequency)
129 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
130
131 import Prelude (error)
132 import Protolude (maybeToEither)
133 import Gargantext.Prelude
134
135 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
136 import Gargantext.Core.Types (TODO)
137 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
138 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
139 import Gargantext.Database.Query.Table.Node.Select
140 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
141 import Gargantext.Database.Admin.Config (userMaster)
142 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
143 import Gargantext.Database.Admin.Types.Node (NodeType(..))
144 import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig)
145 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
146 import qualified Gargantext.Core.Text as GCT
147
148 ------------------------------------------------------------------------
149 --data FacetFormat = Table | Chart
150 data TabType = Docs | Trash | MoreFav | MoreTrash
151 | Terms | Sources | Authors | Institutes
152 | Contacts
153 deriving (Generic, Enum, Bounded, Show)
154
155 instance FromHttpApiData TabType
156 where
157 parseUrlPiece "Docs" = pure Docs
158 parseUrlPiece "Trash" = pure Trash
159 parseUrlPiece "MoreFav" = pure MoreFav
160 parseUrlPiece "MoreTrash" = pure MoreTrash
161
162 parseUrlPiece "Terms" = pure Terms
163 parseUrlPiece "Sources" = pure Sources
164 parseUrlPiece "Institutes" = pure Institutes
165 parseUrlPiece "Authors" = pure Authors
166
167 parseUrlPiece "Contacts" = pure Contacts
168
169 parseUrlPiece _ = Left "Unexpected value of TabType"
170
171 instance ToParamSchema TabType
172 instance ToJSON TabType
173 instance FromJSON TabType
174 instance ToSchema TabType
175 instance Arbitrary TabType
176 where
177 arbitrary = elements [minBound .. maxBound]
178
179 newtype MSet a = MSet (Map a ())
180 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
181
182 instance ToJSON a => ToJSON (MSet a) where
183 toJSON (MSet m) = toJSON (Map.keys m)
184 toEncoding (MSet m) = toEncoding (Map.keys m)
185
186 mSetFromSet :: Set a -> MSet a
187 mSetFromSet = MSet . Map.fromSet (const ())
188
189 mSetFromList :: Ord a => [a] -> MSet a
190 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
191
192 -- mSetToSet :: Ord a => MSet a -> Set a
193 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
194 mSetToSet :: Ord a => MSet a -> Set a
195 mSetToSet = Set.fromList . mSetToList
196
197 mSetToList :: MSet a -> [a]
198 mSetToList (MSet a) = Map.keys a
199
200 instance Foldable MSet where
201 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
202
203 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
204 parseJSON = fmap mSetFromList . parseJSON
205
206 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
207 -- TODO
208 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
209
210 ------------------------------------------------------------------------
211 type NgramsTerm = Text
212
213 data RootParent = RootParent
214 { _rp_root :: NgramsTerm
215 , _rp_parent :: NgramsTerm
216 }
217 deriving (Ord, Eq, Show, Generic)
218
219 deriveJSON (unPrefix "_rp_") ''RootParent
220 makeLenses ''RootParent
221
222 data NgramsRepoElement = NgramsRepoElement
223 { _nre_size :: Int
224 , _nre_list :: ListType
225 --, _nre_root_parent :: Maybe RootParent
226 , _nre_root :: Maybe NgramsTerm
227 , _nre_parent :: Maybe NgramsTerm
228 , _nre_children :: MSet NgramsTerm
229 }
230 deriving (Ord, Eq, Show, Generic)
231
232 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
233 -- TODO
234 -- if ngrams & not size => size
235 -- drop occurrences
236
237 makeLenses ''NgramsRepoElement
238
239 instance ToSchema NgramsRepoElement where
240 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
241
242 instance Serialise (MSet NgramsTerm)
243 instance Serialise NgramsRepoElement
244
245 data NgramsElement =
246 NgramsElement { _ne_ngrams :: NgramsTerm
247 , _ne_size :: Int
248 , _ne_list :: ListType
249 , _ne_occurrences :: Int
250 , _ne_root :: Maybe NgramsTerm
251 , _ne_parent :: Maybe NgramsTerm
252 , _ne_children :: MSet NgramsTerm
253 }
254 deriving (Ord, Eq, Show, Generic)
255
256 deriveJSON (unPrefix "_ne_") ''NgramsElement
257 makeLenses ''NgramsElement
258
259 mkNgramsElement :: NgramsTerm
260 -> ListType
261 -> Maybe RootParent
262 -> MSet NgramsTerm
263 -> NgramsElement
264 mkNgramsElement ngrams list rp children =
265 NgramsElement ngrams (GCT.size ngrams) list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
266
267 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
268 newNgramsElement mayList ngrams =
269 mkNgramsElement ngrams (fromMaybe MapTerm mayList) Nothing mempty
270
271 instance ToSchema NgramsElement where
272 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
273 instance Arbitrary NgramsElement where
274 arbitrary = elements [newNgramsElement Nothing "sport"]
275
276 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
277 ngramsElementToRepo
278 (NgramsElement { _ne_size = s
279 , _ne_list = l
280 , _ne_root = r
281 , _ne_parent = p
282 , _ne_children = c
283 }) =
284 NgramsRepoElement
285 { _nre_size = s
286 , _nre_list = l
287 , _nre_parent = p
288 , _nre_root = r
289 , _nre_children = c
290 }
291
292 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
293 ngramsElementFromRepo
294 ngrams
295 (NgramsRepoElement
296 { _nre_size = s
297 , _nre_list = l
298 , _nre_parent = p
299 , _nre_root = r
300 , _nre_children = c
301 }) =
302 NgramsElement { _ne_size = s
303 , _ne_list = l
304 , _ne_root = r
305 , _ne_parent = p
306 , _ne_children = c
307 , _ne_ngrams = ngrams
308 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
309 {-
310 -- Here we could use 0 if we want to avoid any `panic`.
311 -- It will not happen using getTableNgrams if
312 -- getOccByNgramsOnly provides a count of occurrences for
313 -- all the ngrams given.
314 -}
315 }
316
317 ------------------------------------------------------------------------
318 newtype NgramsTable = NgramsTable [NgramsElement]
319 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
320
321 type NgramsList = NgramsTable
322
323 makePrisms ''NgramsTable
324
325 -- | Question: why these repetition of Type in this instance
326 -- may you document it please ?
327 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
328 each = _NgramsTable . each
329
330 -- TODO discuss
331 -- | TODO Check N and Weight
332 {-
333 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
334 toNgramsElement ns = map toNgramsElement' ns
335 where
336 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
337 where
338 p' = case p of
339 Nothing -> Nothing
340 Just x -> lookup x mapParent
341 c' = maybe mempty identity $ lookup t mapChildren
342 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
343
344 mapParent :: Map Int Text
345 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
346
347 mapChildren :: Map Text (Set Text)
348 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
349 $ Map.fromListWith (<>)
350 $ map (first fromJust)
351 $ filter (isJust . fst)
352 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
353 -}
354
355 mockTable :: NgramsTable
356 mockTable = NgramsTable
357 [ mkNgramsElement "animal" MapTerm Nothing (mSetFromList ["dog", "cat"])
358 , mkNgramsElement "cat" MapTerm (rp "animal") mempty
359 , mkNgramsElement "cats" StopTerm Nothing mempty
360 , mkNgramsElement "dog" MapTerm (rp "animal") (mSetFromList ["dogs"])
361 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
362 , mkNgramsElement "fox" MapTerm Nothing mempty
363 , mkNgramsElement "object" CandidateTerm Nothing mempty
364 , mkNgramsElement "nothing" StopTerm Nothing mempty
365 , mkNgramsElement "organic" MapTerm Nothing (mSetFromList ["flower"])
366 , mkNgramsElement "flower" MapTerm (rp "organic") mempty
367 , mkNgramsElement "moon" CandidateTerm Nothing mempty
368 , mkNgramsElement "sky" StopTerm Nothing mempty
369 ]
370 where
371 rp n = Just $ RootParent n n
372
373 instance Arbitrary NgramsTable where
374 arbitrary = pure mockTable
375
376 instance ToSchema NgramsTable
377
378 ------------------------------------------------------------------------
379 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
380 ------------------------------------------------------------------------
381 -- On the Client side:
382 --data Action = InGroup NgramsId NgramsId
383 -- | OutGroup NgramsId NgramsId
384 -- | SetListType NgramsId ListType
385
386 data PatchSet a = PatchSet
387 { _rem :: Set a
388 , _add :: Set a
389 }
390 deriving (Eq, Ord, Show, Generic)
391
392 makeLenses ''PatchSet
393 makePrisms ''PatchSet
394
395 instance ToJSON a => ToJSON (PatchSet a) where
396 toJSON = genericToJSON $ unPrefix "_"
397 toEncoding = genericToEncoding $ unPrefix "_"
398
399 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
400 parseJSON = genericParseJSON $ unPrefix "_"
401
402 {-
403 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
404 arbitrary = PatchSet <$> arbitrary <*> arbitrary
405
406 type instance Patched (PatchSet a) = Set a
407
408 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
409 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
410
411 instance Ord a => Semigroup (PatchSet a) where
412 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
413 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
414 } -- TODO Review
415
416 instance Ord a => Monoid (PatchSet a) where
417 mempty = PatchSet mempty mempty
418
419 instance Ord a => Group (PatchSet a) where
420 invert (PatchSet r a) = PatchSet a r
421
422 instance Ord a => Composable (PatchSet a) where
423 composable _ _ = undefined
424
425 instance Ord a => Action (PatchSet a) (Set a) where
426 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
427
428 instance Applicable (PatchSet a) (Set a) where
429 applicable _ _ = mempty
430
431 instance Ord a => Validity (PatchSet a) where
432 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
433
434 instance Ord a => Transformable (PatchSet a) where
435 transformable = undefined
436
437 conflicts _p _q = undefined
438
439 transformWith conflict p q = undefined conflict p q
440
441 instance ToSchema a => ToSchema (PatchSet a)
442 -}
443
444 type AddRem = Replace (Maybe ())
445
446 instance Serialise AddRem
447
448 remPatch, addPatch :: AddRem
449 remPatch = replace (Just ()) Nothing
450 addPatch = replace Nothing (Just ())
451
452 isRem :: Replace (Maybe ()) -> Bool
453 isRem = (== remPatch)
454
455 type PatchMap = PM.PatchMap
456
457
458 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
459 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid, Group,
460 Transformable, Composable)
461
462 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
463 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
464
465 instance (Serialise a, Ord a) => Serialise (PatchMap a AddRem)
466 instance (Serialise a, Ord a) => Serialise (PatchMSet a)
467
468 -- TODO this breaks module abstraction
469 makePrisms ''PM.PatchMap
470
471 makePrisms ''PatchMSet
472
473 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
474 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
475 where
476 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
477 f = Map.partition isRem >>> both %~ Map.keysSet
478
479 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
480 g (rems, adds) = Map.fromSet (const remPatch) rems
481 <> Map.fromSet (const addPatch) adds
482
483 instance Ord a => Action (PatchMSet a) (MSet a) where
484 act (PatchMSet p) (MSet m) = MSet $ act p m
485
486 instance Ord a => Applicable (PatchMSet a) (MSet a) where
487 applicable (PatchMSet p) (MSet m) = applicable p m
488
489 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
490 toJSON = toJSON . view _PatchMSetIso
491 toEncoding = toEncoding . view _PatchMSetIso
492
493 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
494 parseJSON = fmap (_PatchMSetIso #) . parseJSON
495
496 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
497 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
498
499 instance ToSchema a => ToSchema (PatchMSet a) where
500 -- TODO
501 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
502
503 type instance Patched (PatchMSet a) = MSet a
504
505 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
506 arbitrary = uncurry replace <$> arbitrary
507 -- If they happen to be equal then the patch is Keep.
508
509 instance ToSchema a => ToSchema (Replace a) where
510 declareNamedSchema (_ :: Proxy (Replace a)) = do
511 -- TODO Keep constructor is not supported here.
512 aSchema <- declareSchemaRef (Proxy :: Proxy a)
513 return $ NamedSchema (Just "Replace") $ mempty
514 & type_ ?~ SwaggerObject
515 & properties .~
516 InsOrdHashMap.fromList
517 [ ("old", aSchema)
518 , ("new", aSchema)
519 ]
520 & required .~ [ "old", "new" ]
521
522 data NgramsPatch
523 = NgramsPatch { _patch_children :: PatchMSet NgramsTerm
524 , _patch_list :: Replace ListType -- TODO Map UserId ListType
525 }
526 | NgramsReplace { _patch_old :: Maybe NgramsRepoElement
527 , _patch_new :: Maybe NgramsRepoElement
528 }
529 deriving (Eq, Show, Generic)
530
531 -- The JSON encoding is untagged, this is OK since the field names are disjoints and thus the encoding is unambiguous.
532 -- TODO: the empty object should be accepted and treated as mempty.
533 deriveJSON (unPrefixUntagged "_") ''NgramsPatch
534 makeLenses ''NgramsPatch
535
536 -- TODO: This instance is simplified since we should either have the fields children and/or list
537 -- or the fields old and/or new.
538 instance ToSchema NgramsPatch where
539 declareNamedSchema _ = do
540 childrenSch <- declareSchemaRef (Proxy :: Proxy (PatchMSet NgramsTerm))
541 listSch <- declareSchemaRef (Proxy :: Proxy (Replace ListType))
542 nreSch <- declareSchemaRef (Proxy :: Proxy NgramsRepoElement)
543 return $ NamedSchema (Just "NgramsPatch") $ mempty
544 & type_ ?~ SwaggerObject
545 & properties .~
546 InsOrdHashMap.fromList
547 [ ("children", childrenSch)
548 , ("list", listSch)
549 , ("old", nreSch)
550 , ("new", nreSch)
551 ]
552
553 instance Arbitrary NgramsPatch where
554 arbitrary = frequency [ (9, NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary))
555 , (1, NgramsReplace <$> arbitrary <*> arbitrary)
556 ]
557
558 instance Serialise NgramsPatch
559 instance Serialise (Replace ListType)
560
561 instance Serialise ListType
562
563 type NgramsPatchIso =
564 MaybePatch NgramsRepoElement (PairPatch (PatchMSet NgramsTerm) (Replace ListType))
565
566 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
567 _NgramsPatch = iso unwrap wrap
568 where
569 unwrap (NgramsPatch c l) = Mod $ PairPatch (c, l)
570 unwrap (NgramsReplace o n) = replace o n
571 wrap x =
572 case unMod x of
573 Just (PairPatch (c, l)) -> NgramsPatch c l
574 Nothing -> NgramsReplace (x ^? old . _Just) (x ^? new . _Just)
575
576 instance Semigroup NgramsPatch where
577 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
578
579 instance Monoid NgramsPatch where
580 mempty = _NgramsPatch # mempty
581
582 instance Validity NgramsPatch where
583 validate p = p ^. _NgramsPatch . to validate
584
585 instance Transformable NgramsPatch where
586 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
587
588 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
589
590 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
591 where
592 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
593
594 type ConflictResolutionNgramsPatch =
595 ( ConflictResolutionReplace (Maybe NgramsRepoElement)
596 , ( ConflictResolutionPatchMSet NgramsTerm
597 , ConflictResolutionReplace ListType
598 )
599 , (Bool, Bool)
600 )
601 type instance ConflictResolution NgramsPatch =
602 ConflictResolutionNgramsPatch
603
604 type PatchedNgramsPatch = Maybe NgramsRepoElement
605 type instance Patched NgramsPatch = PatchedNgramsPatch
606
607 instance Applicable (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
608 applicable (PairPatch (c, l)) n = applicable c (n ^. nre_children) <> applicable l (n ^. nre_list)
609
610 instance Action (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
611 act (PairPatch (c, l)) = (nre_children %~ act c)
612 . (nre_list %~ act l)
613
614 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
615 applicable p = applicable (p ^. _NgramsPatch)
616
617 instance Action NgramsPatch (Maybe NgramsRepoElement) where
618 act p = act (p ^. _NgramsPatch)
619
620 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
621 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
622
623 instance Serialise NgramsTablePatch
624 instance Serialise (PatchMap NgramsTerm NgramsPatch)
625
626 instance FromField NgramsTablePatch
627 where
628 fromField = fromField'
629
630 instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
631 where
632 fromField = fromField'
633
634 type instance ConflictResolution NgramsTablePatch =
635 NgramsTerm -> ConflictResolutionNgramsPatch
636
637 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
638 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
639 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
640
641 makePrisms ''NgramsTablePatch
642 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
643 instance ToSchema NgramsTablePatch
644
645 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
646 applicable p = applicable (p ^. _NgramsTablePatch)
647
648 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
649 act p =
650 fmap (execState (reParentNgramsTablePatch p)) .
651 act (p ^. _NgramsTablePatch)
652
653 instance Arbitrary NgramsTablePatch where
654 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
655
656 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
657 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
658 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
659
660 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
661
662 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
663 reRootChildren root ngram = do
664 nre <- use $ at ngram
665 forOf_ (_Just . nre_children . folded) nre $ \child -> do
666 at child . _Just . nre_root ?= root
667 reRootChildren root child
668
669 reParent :: Maybe RootParent -> ReParent NgramsTerm
670 reParent rp child = do
671 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
672 . (nre_root .~ (_rp_root <$> rp))
673 )
674 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
675
676 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
677 reParentAddRem rp child p =
678 reParent (if isRem p then Nothing else Just rp) child
679
680 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
681 reParentNgramsPatch parent ngramsPatch = do
682 root_of_parent <- use (at parent . _Just . nre_root)
683 let
684 root = fromMaybe parent root_of_parent
685 rp = RootParent { _rp_root = root, _rp_parent = parent }
686 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
687 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
688
689 reParentNgramsTablePatch :: ReParent NgramsTablePatch
690 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
691 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
692
693 ------------------------------------------------------------------------
694 ------------------------------------------------------------------------
695 type Version = Int
696
697 data Versioned a = Versioned
698 { _v_version :: Version
699 , _v_data :: a
700 }
701 deriving (Generic, Show, Eq)
702 deriveJSON (unPrefix "_v_") ''Versioned
703 makeLenses ''Versioned
704 instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where
705 declareNamedSchema = wellNamedSchema "_v_"
706 instance Arbitrary a => Arbitrary (Versioned a) where
707 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
708
709
710 {-
711 -- TODO sequences of modifications (Patchs)
712 type NgramsIdPatch = Patch NgramsId NgramsPatch
713
714 ngramsPatch :: Int -> NgramsPatch
715 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
716
717 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
718 toEdit n p = Edit n p
719 ngramsIdPatch :: Patch NgramsId NgramsPatch
720 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
721 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
722 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
723 ]
724
725 -- applyPatchBack :: Patch -> IO Patch
726 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
727 -}
728 ------------------------------------------------------------------------
729 ------------------------------------------------------------------------
730 ------------------------------------------------------------------------
731
732 {-
733 -- TODO: Replace.old is ignored which means that if the current list
734 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
735 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
736 -- However this should not happen in non conflicting situations.
737 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
738 mkListsUpdate nt patches =
739 [ (ngramsTypeId nt, ng, listTypeId lt)
740 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
741 , lt <- patch ^.. patch_list . new
742 ]
743
744 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
745 -> NgramsType
746 -> NgramsTablePatch
747 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
748 mkChildrenGroups addOrRem nt patches =
749 [ (ngramsTypeId nt, parent, child)
750 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
751 , child <- patch ^.. patch_children . to addOrRem . folded
752 ]
753 -}
754
755 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
756 ngramsTypeFromTabType tabType =
757 let lieu = "Garg.API.Ngrams: " :: Text in
758 case tabType of
759 Sources -> TableNgrams.Sources
760 Authors -> TableNgrams.Authors
761 Institutes -> TableNgrams.Institutes
762 Terms -> TableNgrams.NgramsTerms
763 _ -> panic $ lieu <> "No Ngrams for this tab"
764 -- TODO: This `panic` would disapear with custom NgramsType.
765
766 ------------------------------------------------------------------------
767 data Repo s p = Repo
768 { _r_version :: Version
769 , _r_state :: s
770 , _r_history :: [p]
771 -- first patch in the list is the most recent
772 }
773 deriving (Generic)
774
775 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
776 parseJSON = genericParseJSON $ unPrefix "_r_"
777
778 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
779 toJSON = genericToJSON $ unPrefix "_r_"
780 toEncoding = genericToEncoding $ unPrefix "_r_"
781
782 instance (Serialise s, Serialise p) => Serialise (Repo s p)
783
784 makeLenses ''Repo
785
786 initRepo :: Monoid s => Repo s p
787 initRepo = Repo 1 mempty []
788
789 type NgramsRepo = Repo NgramsState NgramsStatePatch
790 type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
791 type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
792
793 instance Serialise (PM.PatchMap NodeId NgramsTablePatch)
794 instance Serialise NgramsStatePatch
795
796 initMockRepo :: NgramsRepo
797 initMockRepo = Repo 1 s []
798 where
799 s = Map.singleton TableNgrams.NgramsTerms
800 $ Map.singleton 47254
801 $ Map.fromList
802 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
803
804 data RepoEnv = RepoEnv
805 { _renv_var :: !(MVar NgramsRepo)
806 , _renv_saver :: !(IO ())
807 , _renv_lock :: !FileLock
808 }
809 deriving (Generic)
810
811 makeLenses ''RepoEnv
812
813 class HasRepoVar env where
814 repoVar :: Getter env (MVar NgramsRepo)
815
816 instance HasRepoVar (MVar NgramsRepo) where
817 repoVar = identity
818
819 class HasRepoSaver env where
820 repoSaver :: Getter env (IO ())
821
822 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
823 repoEnv :: Getter env RepoEnv
824
825 instance HasRepo RepoEnv where
826 repoEnv = identity
827
828 instance HasRepoVar RepoEnv where
829 repoVar = renv_var
830
831 instance HasRepoSaver RepoEnv where
832 repoSaver = renv_saver
833
834 type RepoCmdM env err m =
835 ( MonadReader env m
836 , MonadError err m
837 , MonadBaseControl IO m
838 , HasRepo env
839 )
840 ------------------------------------------------------------------------
841
842 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
843 => m ()
844 saveRepo = liftBase =<< view repoSaver
845
846 listTypeConflictResolution :: ListType -> ListType -> ListType
847 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
848
849 ngramsStatePatchConflictResolution
850 :: TableNgrams.NgramsType
851 -> NodeId
852 -> NgramsTerm
853 -> ConflictResolutionNgramsPatch
854 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
855 = (ours, (const ours, ours), (False, False))
856 -- (False, False) mean here that Mod has always priority.
857 -- (True, False) <- would mean priority to the left (same as ours).
858
859 -- undefined {- TODO think this through -}, listTypeConflictResolution)
860
861 -- Current state:
862 -- Insertions are not considered as patches,
863 -- they do not extend history,
864 -- they do not bump version.
865 insertNewOnly :: a -> Maybe b -> a
866 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
867 -- TODO error handling
868
869 something :: Monoid a => Maybe a -> a
870 something Nothing = mempty
871 something (Just a) = a
872
873 {- unused
874 -- TODO refactor with putListNgrams
875 copyListNgrams :: RepoCmdM env err m
876 => NodeId -> NodeId -> NgramsType
877 -> m ()
878 copyListNgrams srcListId dstListId ngramsType = do
879 var <- view repoVar
880 liftBase $ modifyMVar_ var $
881 pure . (r_state . at ngramsType %~ (Just . f . something))
882 saveRepo
883 where
884 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
885 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
886
887 -- TODO refactor with putListNgrams
888 -- The list must be non-empty!
889 -- The added ngrams must be non-existent!
890 addListNgrams :: RepoCmdM env err m
891 => NodeId -> NgramsType
892 -> [NgramsElement] -> m ()
893 addListNgrams listId ngramsType nes = do
894 var <- view repoVar
895 liftBase $ modifyMVar_ var $
896 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
897 saveRepo
898 where
899 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
900 -}
901
902 -- UNSAFE
903 rmListNgrams :: RepoCmdM env err m
904 => ListId
905 -> TableNgrams.NgramsType
906 -> m ()
907 rmListNgrams l nt = setListNgrams l nt mempty
908
909 -- | TODO: incr the Version number
910 -- && should use patch
911 -- UNSAFE
912 setListNgrams :: RepoCmdM env err m
913 => NodeId
914 -> TableNgrams.NgramsType
915 -> Map NgramsTerm NgramsRepoElement
916 -> m ()
917 setListNgrams listId ngramsType ns = do
918 var <- view repoVar
919 liftBase $ modifyMVar_ var $
920 pure . ( r_state
921 . at ngramsType %~
922 (Just .
923 (at listId .~ ( Just ns))
924 . something
925 )
926 )
927 saveRepo
928
929 -- NOTE
930 -- This is no longer part of the API.
931 -- This function is maintained for its usage in Database.Action.Flow.List.
932 -- If the given list of ngrams elements contains ngrams already in
933 -- the repo, they will be ignored.
934 putListNgrams :: (HasInvalidError err, RepoCmdM env err m)
935 => NodeId
936 -> TableNgrams.NgramsType
937 -> [NgramsElement] -> m ()
938 putListNgrams _ _ [] = pure ()
939 putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
940 where
941 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
942
943 putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
944 => NodeId
945 -> TableNgrams.NgramsType
946 -> Map NgramsTerm NgramsRepoElement
947 -> m ()
948 putListNgrams' nodeId ngramsType ns = do
949 -- printDebug "[putLictNgrams'] nodeId" nodeId
950 -- printDebug "[putLictNgrams'] ngramsType" ngramsType
951 -- printDebug "[putListNgrams'] ns" ns
952
953 let p1 = NgramsTablePatch . PM.fromMap $ NgramsReplace Nothing . Just <$> ns
954 (p0, p0_validity) = PM.singleton nodeId p1
955 (p, p_validity) = PM.singleton ngramsType p0
956 assertValid p0_validity
957 assertValid p_validity
958 {-
959 -- TODO
960 v <- currentVersion
961 q <- commitStatePatch (Versioned v p)
962 assert empty q
963 -- What if another commit comes in between?
964 -- Shall we have a blindCommitStatePatch? It would not ask for a version but just a patch.
965 -- The modifyMVar_ would test the patch with applicable first.
966 -- If valid the rest would be atomic and no merge is required.
967 -}
968 var <- view repoVar
969 liftBase $ modifyMVar_ var $ \r -> do
970 pure $ r & r_version +~ 1
971 & r_history %~ (p :)
972 & r_state . at ngramsType %~
973 (Just .
974 (at nodeId %~
975 ( Just
976 . (<> ns)
977 . something
978 )
979 )
980 . something
981 )
982 saveRepo
983
984
985 currentVersion :: RepoCmdM env err m
986 => m Version
987 currentVersion = do
988 var <- view repoVar
989 r <- liftBase $ readMVar var
990 pure $ r ^. r_version
991
992
993 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
994 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
995 commitStatePatch (Versioned p_version p) = do
996 var <- view repoVar
997 vq' <- liftBase $ modifyMVar var $ \r -> do
998 let
999 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
1000 (p', q') = transformWith ngramsStatePatchConflictResolution p q
1001 r' = r & r_version +~ 1
1002 & r_state %~ act p'
1003 & r_history %~ (p' :)
1004 {-
1005 -- Ideally we would like to check these properties. However:
1006 -- * They should be checked only to debug the code. The client data
1007 -- should be able to trigger these.
1008 -- * What kind of error should they throw (we are in IO here)?
1009 -- * Should we keep modifyMVar?
1010 -- * Should we throw the validation in an Exception, catch it around
1011 -- modifyMVar and throw it back as an Error?
1012 assertValid $ transformable p q
1013 assertValid $ applicable p' (r ^. r_state)
1014 -}
1015 pure (r', Versioned (r' ^. r_version) q')
1016
1017 saveRepo
1018 pure vq'
1019
1020 -- This is a special case of tableNgramsPut where the input patch is empty.
1021 tableNgramsPull :: RepoCmdM env err m
1022 => ListId
1023 -> TableNgrams.NgramsType
1024 -> Version
1025 -> m (Versioned NgramsTablePatch)
1026 tableNgramsPull listId ngramsType p_version = do
1027 var <- view repoVar
1028 r <- liftBase $ readMVar var
1029
1030 let
1031 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
1032 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
1033
1034 pure (Versioned (r ^. r_version) q_table)
1035
1036 -- Apply the given patch to the DB and returns the patch to be applied on the
1037 -- client.
1038 -- TODO-ACCESS check
1039 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
1040 => TabType -> ListId
1041 -> Versioned NgramsTablePatch
1042 -> m (Versioned NgramsTablePatch)
1043 tableNgramsPut tabType listId (Versioned p_version p_table)
1044 | p_table == mempty = do
1045 let ngramsType = ngramsTypeFromTabType tabType
1046 tableNgramsPull listId ngramsType p_version
1047
1048 | otherwise = do
1049 let ngramsType = ngramsTypeFromTabType tabType
1050 (p0, p0_validity) = PM.singleton listId p_table
1051 (p, p_validity) = PM.singleton ngramsType p0
1052
1053 assertValid p0_validity
1054 assertValid p_validity
1055
1056 commitStatePatch (Versioned p_version p)
1057 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
1058
1059 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
1060 mergeNgramsElement _neOld neNew = neNew
1061 {-
1062 { _ne_list :: ListType
1063 If we merge the parents/children we can potentially create cycles!
1064 , _ne_parent :: Maybe NgramsTerm
1065 , _ne_children :: MSet NgramsTerm
1066 }
1067 -}
1068
1069 getNgramsTableMap :: RepoCmdM env err m
1070 => NodeId
1071 -> TableNgrams.NgramsType
1072 -> m (Versioned NgramsTableMap)
1073 getNgramsTableMap nodeId ngramsType = do
1074 v <- view repoVar
1075 repo <- liftBase $ readMVar v
1076 pure $ Versioned (repo ^. r_version)
1077 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1078
1079 dumpJsonTableMap :: RepoCmdM env err m
1080 => Text
1081 -> NodeId
1082 -> TableNgrams.NgramsType
1083 -> m ()
1084 dumpJsonTableMap fpath nodeId ngramsType = do
1085 m <- getNgramsTableMap nodeId ngramsType
1086 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
1087 pure ()
1088
1089 type MinSize = Int
1090 type MaxSize = Int
1091
1092 -- | TODO Errors management
1093 -- TODO: polymorphic for Annuaire or Corpus or ...
1094 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1095 -- TODO: should take only one ListId
1096
1097 getTime' :: MonadBase IO m => m TimeSpec
1098 getTime' = liftBase $ getTime ProcessCPUTime
1099
1100
1101 getTableNgrams :: forall env err m.
1102 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1103 => NodeType -> NodeId -> TabType
1104 -> ListId -> Limit -> Maybe Offset
1105 -> Maybe ListType
1106 -> Maybe MinSize -> Maybe MaxSize
1107 -> Maybe OrderBy
1108 -> (NgramsTerm -> Bool)
1109 -> m (Versioned NgramsTable)
1110 getTableNgrams _nType nId tabType listId limit_ offset
1111 listType minSize maxSize orderBy searchQuery = do
1112
1113 t0 <- getTime'
1114 -- lIds <- selectNodesWithUsername NodeList userMaster
1115 let
1116 ngramsType = ngramsTypeFromTabType tabType
1117 offset' = maybe 0 identity offset
1118 listType' = maybe (const True) (==) listType
1119 minSize' = maybe (const True) (<=) minSize
1120 maxSize' = maybe (const True) (>=) maxSize
1121
1122 selected_node n = minSize' s
1123 && maxSize' s
1124 && searchQuery (n ^. ne_ngrams)
1125 && listType' (n ^. ne_list)
1126 where
1127 s = n ^. ne_size
1128
1129 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1130
1131 ---------------------------------------
1132 sortOnOrder Nothing = identity
1133 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1134 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1135 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1136 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1137
1138 ---------------------------------------
1139 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1140 selectAndPaginate tableMap = roots <> inners
1141 where
1142 list = tableMap ^.. each
1143 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1144 (ne ^. ne_root)
1145 selected_nodes = list & take limit_
1146 . drop offset'
1147 . filter selected_node
1148 . sortOnOrder orderBy
1149 roots = rootOf <$> selected_nodes
1150 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1151 inners = list & filter (selected_inner rootsSet)
1152
1153 ---------------------------------------
1154 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1155 setScores False table = pure table
1156 setScores True table = do
1157 let ngrams_terms = (table ^.. each . ne_ngrams)
1158 t1 <- getTime'
1159 occurrences <- getOccByNgramsOnlyFast' nId
1160 listId
1161 ngramsType
1162 ngrams_terms
1163 t2 <- getTime'
1164 liftBase $ hprint stderr
1165 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1166 (length ngrams_terms) t1 t2
1167 {-
1168 occurrences <- getOccByNgramsOnlySlow nType nId
1169 (lIds <> [listId])
1170 ngramsType
1171 ngrams_terms
1172 -}
1173 let
1174 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1175
1176 pure $ table & each %~ setOcc
1177 ---------------------------------------
1178
1179 -- lists <- catMaybes <$> listsWith userMaster
1180 -- trace (show lists) $
1181 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1182
1183 let scoresNeeded = needsScores orderBy
1184 tableMap1 <- getNgramsTableMap listId ngramsType
1185 t1 <- getTime'
1186 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1187 . Map.mapWithKey ngramsElementFromRepo
1188 t2 <- getTime'
1189 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1190 . setScores (not scoresNeeded)
1191 . selectAndPaginate
1192 t3 <- getTime'
1193 liftBase $ hprint stderr
1194 ("getTableNgrams total=" % timeSpecs
1195 % " map1=" % timeSpecs
1196 % " map2=" % timeSpecs
1197 % " map3=" % timeSpecs
1198 % " sql=" % (if scoresNeeded then "map2" else "map3")
1199 % "\n"
1200 ) t0 t3 t0 t1 t1 t2 t2 t3
1201 pure tableMap3
1202
1203
1204 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
1205 scoresRecomputeTableNgrams nId tabType listId = do
1206 tableMap <- getNgramsTableMap listId ngramsType
1207 _ <- tableMap & v_data %%~ setScores
1208 . Map.mapWithKey ngramsElementFromRepo
1209
1210 pure $ 1
1211 where
1212 ngramsType = ngramsTypeFromTabType tabType
1213
1214 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
1215 setScores table = do
1216 let ngrams_terms = (table ^.. each . ne_ngrams)
1217 occurrences <- getOccByNgramsOnlyFast' nId
1218 listId
1219 ngramsType
1220 ngrams_terms
1221 let
1222 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1223
1224 pure $ table & each %~ setOcc
1225
1226
1227
1228 -- APIs
1229
1230 -- TODO: find a better place for the code above, All APIs stay here
1231 type QueryParamR = QueryParam' '[Required, Strict]
1232
1233 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1234 deriving (Generic, Enum, Bounded, Read, Show)
1235
1236 instance FromHttpApiData OrderBy
1237 where
1238 parseUrlPiece "TermAsc" = pure TermAsc
1239 parseUrlPiece "TermDesc" = pure TermDesc
1240 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1241 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1242 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1243
1244
1245 instance ToParamSchema OrderBy
1246 instance FromJSON OrderBy
1247 instance ToJSON OrderBy
1248 instance ToSchema OrderBy
1249 instance Arbitrary OrderBy
1250 where
1251 arbitrary = elements [minBound..maxBound]
1252
1253 needsScores :: Maybe OrderBy -> Bool
1254 needsScores (Just ScoreAsc) = True
1255 needsScores (Just ScoreDesc) = True
1256 needsScores _ = False
1257
1258 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1259 :> QueryParamR "ngramsType" TabType
1260 :> QueryParamR "list" ListId
1261 :> QueryParamR "limit" Limit
1262 :> QueryParam "offset" Offset
1263 :> QueryParam "listType" ListType
1264 :> QueryParam "minTermSize" MinSize
1265 :> QueryParam "maxTermSize" MaxSize
1266 :> QueryParam "orderBy" OrderBy
1267 :> QueryParam "search" Text
1268 :> Get '[JSON] (Versioned NgramsTable)
1269
1270 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1271 :> QueryParamR "ngramsType" TabType
1272 :> QueryParamR "list" ListId
1273 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1274 :> Put '[JSON] (Versioned NgramsTablePatch)
1275
1276 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
1277 :> QueryParamR "ngramsType" TabType
1278 :> QueryParamR "list" ListId
1279 :> "recompute" :> Post '[JSON] Int
1280
1281 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
1282 :> QueryParamR "ngramsType" TabType
1283 :> QueryParamR "list" ListId
1284 :> Get '[JSON] Version
1285
1286 type TableNgramsApi = TableNgramsApiGet
1287 :<|> TableNgramsApiPut
1288 :<|> RecomputeScoresNgramsApiGet
1289 :<|> "version" :> TableNgramsApiGetVersion
1290
1291 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1292 => NodeId
1293 -> TabType
1294 -> ListId
1295 -> Limit
1296 -> Maybe Offset
1297 -> Maybe ListType
1298 -> Maybe MinSize -> Maybe MaxSize
1299 -> Maybe OrderBy
1300 -> Maybe Text -- full text search
1301 -> m (Versioned NgramsTable)
1302 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1303 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1304 where
1305 searchQuery = maybe (const True) isInfixOf mt
1306
1307 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1308 => NodeId
1309 -> TabType
1310 -> ListId
1311 -> m Version
1312 getTableNgramsVersion _nId _tabType _listId = currentVersion
1313 -- TODO: limit?
1314 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
1315 -- This line above looks like a waste of computation to finally get only the version.
1316 -- See the comment about listNgramsChangedSince.
1317
1318
1319 -- | Text search is deactivated for now for ngrams by doc only
1320 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1321 => DocId -> TabType
1322 -> ListId -> Limit -> Maybe Offset
1323 -> Maybe ListType
1324 -> Maybe MinSize -> Maybe MaxSize
1325 -> Maybe OrderBy
1326 -> Maybe Text -- full text search
1327 -> m (Versioned NgramsTable)
1328 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1329 ns <- selectNodesWithUsername NodeList userMaster
1330 let ngramsType = ngramsTypeFromTabType tabType
1331 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1332 let searchQuery = flip S.member (S.fromList ngs)
1333 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1334
1335
1336
1337 apiNgramsTableCorpus :: ( RepoCmdM env err m
1338 , HasNodeError err
1339 , HasInvalidError err
1340 , HasConnectionPool env
1341 , HasConfig env
1342 )
1343 => NodeId -> ServerT TableNgramsApi m
1344 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1345 :<|> tableNgramsPut
1346 :<|> scoresRecomputeTableNgrams cId
1347 :<|> getTableNgramsVersion cId
1348
1349 apiNgramsTableDoc :: ( RepoCmdM env err m
1350 , HasNodeError err
1351 , HasInvalidError err
1352 , HasConnectionPool env
1353 , HasConfig env
1354 )
1355 => DocId -> ServerT TableNgramsApi m
1356 apiNgramsTableDoc dId = getTableNgramsDoc dId
1357 :<|> tableNgramsPut
1358 :<|> scoresRecomputeTableNgrams dId
1359 :<|> getTableNgramsVersion dId
1360 -- > index all the corpus accordingly (TODO AD)
1361
1362 -- Did the given list of ngrams changed since the given version?
1363 -- The returned value is versioned boolean value, meaning that one always retrieve the
1364 -- latest version.
1365 -- If the given version is negative then one simply receive the latest version and True.
1366 -- Using this function is more precise than simply comparing the latest version number
1367 -- with the local version number. Indeed there might be no change to this particular list
1368 -- and still the version number has changed because of other lists.
1369 --
1370 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
1371 -- * currentVersion: good computation, good bandwidth, bad precision.
1372 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
1373 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
1374 listNgramsChangedSince :: RepoCmdM env err m
1375 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
1376 listNgramsChangedSince listId ngramsType version
1377 | version < 0 =
1378 Versioned <$> currentVersion <*> pure True
1379 | otherwise =
1380 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1381
1382 -- Instances
1383 instance Arbitrary NgramsRepoElement where
1384 arbitrary = elements $ map ngramsElementToRepo ns
1385 where
1386 NgramsTable ns = mockTable
1387
1388 --{-
1389 instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
1390 where
1391 parseUrlPiece x = maybeToEither x (decode $ cs x)