]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[DOC] fix haddock errors
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24 {-# OPTIONS -fno-warn-orphans #-}
25
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , setListNgrams
33 --, rmListNgrams TODO fix before exporting
34 , putListNgrams
35 --, putListNgrams'
36 , apiNgramsTableCorpus
37 , apiNgramsTableDoc
38
39 , NgramsStatePatch
40 , NgramsTablePatch
41 , NgramsTableMap
42
43 , NgramsElement(..)
44 , mkNgramsElement
45 , mergeNgramsElement
46
47 , RootParent(..)
48
49 , MSet
50 , mSetFromList
51 , mSetToList
52
53 , Repo(..)
54 , r_version
55 , r_state
56 , r_history
57 , NgramsRepo
58 , NgramsRepoElement(..)
59 , saveRepo
60 , initRepo
61
62 , RepoEnv(..)
63 , renv_var
64 , renv_lock
65
66 , TabType(..)
67 , ngramsTypeFromTabType
68
69 , HasRepoVar(..)
70 , HasRepoSaver(..)
71 , HasRepo(..)
72 , RepoCmdM
73 , QueryParamR
74 , TODO
75
76 -- Internals
77 , getNgramsTableMap
78 , dumpJsonTableMap
79 , tableNgramsPull
80 , tableNgramsPut
81
82 , Version
83 , Versioned(..)
84 , currentVersion
85 , listNgramsChangedSince
86 )
87 where
88
89 import Codec.Serialise (Serialise())
90 import Control.Category ((>>>))
91 import Control.Concurrent
92 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
93 import Control.Monad.Base (MonadBase, liftBase)
94 import Control.Monad.Error.Class (MonadError)
95 import Control.Monad.Reader
96 import Control.Monad.State
97 import Control.Monad.Trans.Control (MonadBaseControl)
98 import Data.Aeson hiding ((.=))
99 import Data.Aeson.TH (deriveJSON)
100 import qualified Data.Aeson.Text as DAT
101 import Data.Either (Either(..))
102 import Data.Foldable
103 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
104 import qualified Data.List as List
105 import Data.Map.Strict (Map)
106 import qualified Data.Map.Strict as Map
107 import qualified Data.Map.Strict.Patch as PM
108 import Data.Maybe (fromMaybe)
109 import Data.Monoid
110 import Data.Ord (Down(..))
111 import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..),
112 PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours,
113 MaybePatch(Mod), unMod, old, new)
114 import Data.Set (Set)
115 import qualified Data.Set as S
116 import qualified Data.Set as Set
117 import Data.Swagger hiding (version, patch)
118 import Data.Text (Text, isInfixOf, unpack)
119 import Data.Text.Lazy.IO as DTL
120 import Data.Validity
121 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
122 import Formatting (hprint, int, (%))
123 import Formatting.Clock (timeSpecs)
124 import GHC.Generics (Generic)
125 import Servant hiding (Patch)
126 import System.Clock (getTime, TimeSpec, Clock(..))
127 import System.FileLock (FileLock)
128 import System.IO (stderr)
129 import Test.QuickCheck (elements, frequency)
130 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
131
132 import Prelude (error)
133 import Protolude (maybeToEither)
134 import Gargantext.Prelude
135
136 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
137 import Gargantext.Core.Types (TODO)
138 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
139 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
140 import Gargantext.Database.Query.Table.Node.Select
141 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
142 import Gargantext.Database.Admin.Config (userMaster)
143 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
144 import Gargantext.Database.Admin.Types.Node (NodeType(..))
145 import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig)
146 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
147 import qualified Gargantext.Core.Text as GCT
148
149 ------------------------------------------------------------------------
150 --data FacetFormat = Table | Chart
151 data TabType = Docs | Trash | MoreFav | MoreTrash
152 | Terms | Sources | Authors | Institutes
153 | Contacts
154 deriving (Generic, Enum, Bounded, Show)
155
156 instance FromHttpApiData TabType
157 where
158 parseUrlPiece "Docs" = pure Docs
159 parseUrlPiece "Trash" = pure Trash
160 parseUrlPiece "MoreFav" = pure MoreFav
161 parseUrlPiece "MoreTrash" = pure MoreTrash
162
163 parseUrlPiece "Terms" = pure Terms
164 parseUrlPiece "Sources" = pure Sources
165 parseUrlPiece "Institutes" = pure Institutes
166 parseUrlPiece "Authors" = pure Authors
167
168 parseUrlPiece "Contacts" = pure Contacts
169
170 parseUrlPiece _ = Left "Unexpected value of TabType"
171
172 instance ToParamSchema TabType
173 instance ToJSON TabType
174 instance FromJSON TabType
175 instance ToSchema TabType
176 instance Arbitrary TabType
177 where
178 arbitrary = elements [minBound .. maxBound]
179
180 newtype MSet a = MSet (Map a ())
181 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
182
183 instance ToJSON a => ToJSON (MSet a) where
184 toJSON (MSet m) = toJSON (Map.keys m)
185 toEncoding (MSet m) = toEncoding (Map.keys m)
186
187 mSetFromSet :: Set a -> MSet a
188 mSetFromSet = MSet . Map.fromSet (const ())
189
190 mSetFromList :: Ord a => [a] -> MSet a
191 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
192
193 -- mSetToSet :: Ord a => MSet a -> Set a
194 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
195 mSetToSet :: Ord a => MSet a -> Set a
196 mSetToSet = Set.fromList . mSetToList
197
198 mSetToList :: MSet a -> [a]
199 mSetToList (MSet a) = Map.keys a
200
201 instance Foldable MSet where
202 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
203
204 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
205 parseJSON = fmap mSetFromList . parseJSON
206
207 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
208 -- TODO
209 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
210
211 ------------------------------------------------------------------------
212 type NgramsTerm = Text
213
214 data RootParent = RootParent
215 { _rp_root :: NgramsTerm
216 , _rp_parent :: NgramsTerm
217 }
218 deriving (Ord, Eq, Show, Generic)
219
220 deriveJSON (unPrefix "_rp_") ''RootParent
221 makeLenses ''RootParent
222
223 data NgramsRepoElement = NgramsRepoElement
224 { _nre_size :: Int
225 , _nre_list :: ListType
226 --, _nre_root_parent :: Maybe RootParent
227 , _nre_root :: Maybe NgramsTerm
228 , _nre_parent :: Maybe NgramsTerm
229 , _nre_children :: MSet NgramsTerm
230 }
231 deriving (Ord, Eq, Show, Generic)
232
233 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
234 -- TODO
235 -- if ngrams & not size => size
236 -- drop occurrences
237
238 makeLenses ''NgramsRepoElement
239
240 instance ToSchema NgramsRepoElement where
241 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
242
243 instance Serialise (MSet NgramsTerm)
244 instance Serialise NgramsRepoElement
245
246 data NgramsElement =
247 NgramsElement { _ne_ngrams :: NgramsTerm
248 , _ne_size :: Int
249 , _ne_list :: ListType
250 , _ne_occurrences :: Int
251 , _ne_root :: Maybe NgramsTerm
252 , _ne_parent :: Maybe NgramsTerm
253 , _ne_children :: MSet NgramsTerm
254 }
255 deriving (Ord, Eq, Show, Generic)
256
257 deriveJSON (unPrefix "_ne_") ''NgramsElement
258 makeLenses ''NgramsElement
259
260 mkNgramsElement :: NgramsTerm
261 -> ListType
262 -> Maybe RootParent
263 -> MSet NgramsTerm
264 -> NgramsElement
265 mkNgramsElement ngrams list rp children =
266 NgramsElement ngrams (GCT.size ngrams) list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
267
268 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
269 newNgramsElement mayList ngrams =
270 mkNgramsElement ngrams (fromMaybe MapTerm mayList) Nothing mempty
271
272 instance ToSchema NgramsElement where
273 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
274 instance Arbitrary NgramsElement where
275 arbitrary = elements [newNgramsElement Nothing "sport"]
276
277 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
278 ngramsElementToRepo
279 (NgramsElement { _ne_size = s
280 , _ne_list = l
281 , _ne_root = r
282 , _ne_parent = p
283 , _ne_children = c
284 }) =
285 NgramsRepoElement
286 { _nre_size = s
287 , _nre_list = l
288 , _nre_parent = p
289 , _nre_root = r
290 , _nre_children = c
291 }
292
293 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
294 ngramsElementFromRepo
295 ngrams
296 (NgramsRepoElement
297 { _nre_size = s
298 , _nre_list = l
299 , _nre_parent = p
300 , _nre_root = r
301 , _nre_children = c
302 }) =
303 NgramsElement { _ne_size = s
304 , _ne_list = l
305 , _ne_root = r
306 , _ne_parent = p
307 , _ne_children = c
308 , _ne_ngrams = ngrams
309 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
310 {-
311 -- Here we could use 0 if we want to avoid any `panic`.
312 -- It will not happen using getTableNgrams if
313 -- getOccByNgramsOnly provides a count of occurrences for
314 -- all the ngrams given.
315 -}
316 }
317
318 ------------------------------------------------------------------------
319 newtype NgramsTable = NgramsTable [NgramsElement]
320 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
321
322 type NgramsList = NgramsTable
323
324 makePrisms ''NgramsTable
325
326 -- | Question: why these repetition of Type in this instance
327 -- may you document it please ?
328 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
329 each = _NgramsTable . each
330
331 -- TODO discuss
332 -- | TODO Check N and Weight
333 {-
334 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
335 toNgramsElement ns = map toNgramsElement' ns
336 where
337 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
338 where
339 p' = case p of
340 Nothing -> Nothing
341 Just x -> lookup x mapParent
342 c' = maybe mempty identity $ lookup t mapChildren
343 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
344
345 mapParent :: Map Int Text
346 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
347
348 mapChildren :: Map Text (Set Text)
349 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
350 $ Map.fromListWith (<>)
351 $ map (first fromJust)
352 $ filter (isJust . fst)
353 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
354 -}
355
356 mockTable :: NgramsTable
357 mockTable = NgramsTable
358 [ mkNgramsElement "animal" MapTerm Nothing (mSetFromList ["dog", "cat"])
359 , mkNgramsElement "cat" MapTerm (rp "animal") mempty
360 , mkNgramsElement "cats" StopTerm Nothing mempty
361 , mkNgramsElement "dog" MapTerm (rp "animal") (mSetFromList ["dogs"])
362 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
363 , mkNgramsElement "fox" MapTerm Nothing mempty
364 , mkNgramsElement "object" CandidateTerm Nothing mempty
365 , mkNgramsElement "nothing" StopTerm Nothing mempty
366 , mkNgramsElement "organic" MapTerm Nothing (mSetFromList ["flower"])
367 , mkNgramsElement "flower" MapTerm (rp "organic") mempty
368 , mkNgramsElement "moon" CandidateTerm Nothing mempty
369 , mkNgramsElement "sky" StopTerm Nothing mempty
370 ]
371 where
372 rp n = Just $ RootParent n n
373
374 instance Arbitrary NgramsTable where
375 arbitrary = pure mockTable
376
377 instance ToSchema NgramsTable
378
379 ------------------------------------------------------------------------
380 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
381 ------------------------------------------------------------------------
382 -- On the Client side:
383 --data Action = InGroup NgramsId NgramsId
384 -- | OutGroup NgramsId NgramsId
385 -- | SetListType NgramsId ListType
386
387 data PatchSet a = PatchSet
388 { _rem :: Set a
389 , _add :: Set a
390 }
391 deriving (Eq, Ord, Show, Generic)
392
393 makeLenses ''PatchSet
394 makePrisms ''PatchSet
395
396 instance ToJSON a => ToJSON (PatchSet a) where
397 toJSON = genericToJSON $ unPrefix "_"
398 toEncoding = genericToEncoding $ unPrefix "_"
399
400 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
401 parseJSON = genericParseJSON $ unPrefix "_"
402
403 {-
404 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
405 arbitrary = PatchSet <$> arbitrary <*> arbitrary
406
407 type instance Patched (PatchSet a) = Set a
408
409 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
410 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
411
412 instance Ord a => Semigroup (PatchSet a) where
413 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
414 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
415 } -- TODO Review
416
417 instance Ord a => Monoid (PatchSet a) where
418 mempty = PatchSet mempty mempty
419
420 instance Ord a => Group (PatchSet a) where
421 invert (PatchSet r a) = PatchSet a r
422
423 instance Ord a => Composable (PatchSet a) where
424 composable _ _ = undefined
425
426 instance Ord a => Action (PatchSet a) (Set a) where
427 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
428
429 instance Applicable (PatchSet a) (Set a) where
430 applicable _ _ = mempty
431
432 instance Ord a => Validity (PatchSet a) where
433 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
434
435 instance Ord a => Transformable (PatchSet a) where
436 transformable = undefined
437
438 conflicts _p _q = undefined
439
440 transformWith conflict p q = undefined conflict p q
441
442 instance ToSchema a => ToSchema (PatchSet a)
443 -}
444
445 type AddRem = Replace (Maybe ())
446
447 instance Serialise AddRem
448
449 remPatch, addPatch :: AddRem
450 remPatch = replace (Just ()) Nothing
451 addPatch = replace Nothing (Just ())
452
453 isRem :: Replace (Maybe ()) -> Bool
454 isRem = (== remPatch)
455
456 type PatchMap = PM.PatchMap
457
458
459 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
460 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid, Group,
461 Transformable, Composable)
462
463 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
464 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
465
466 instance (Serialise a, Ord a) => Serialise (PatchMap a AddRem)
467 instance (Serialise a, Ord a) => Serialise (PatchMSet a)
468
469 -- TODO this breaks module abstraction
470 makePrisms ''PM.PatchMap
471
472 makePrisms ''PatchMSet
473
474 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
475 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
476 where
477 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
478 f = Map.partition isRem >>> both %~ Map.keysSet
479
480 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
481 g (rems, adds) = Map.fromSet (const remPatch) rems
482 <> Map.fromSet (const addPatch) adds
483
484 instance Ord a => Action (PatchMSet a) (MSet a) where
485 act (PatchMSet p) (MSet m) = MSet $ act p m
486
487 instance Ord a => Applicable (PatchMSet a) (MSet a) where
488 applicable (PatchMSet p) (MSet m) = applicable p m
489
490 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
491 toJSON = toJSON . view _PatchMSetIso
492 toEncoding = toEncoding . view _PatchMSetIso
493
494 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
495 parseJSON = fmap (_PatchMSetIso #) . parseJSON
496
497 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
498 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
499
500 instance ToSchema a => ToSchema (PatchMSet a) where
501 -- TODO
502 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
503
504 type instance Patched (PatchMSet a) = MSet a
505
506 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
507 arbitrary = uncurry replace <$> arbitrary
508 -- If they happen to be equal then the patch is Keep.
509
510 instance ToSchema a => ToSchema (Replace a) where
511 declareNamedSchema (_ :: Proxy (Replace a)) = do
512 -- TODO Keep constructor is not supported here.
513 aSchema <- declareSchemaRef (Proxy :: Proxy a)
514 return $ NamedSchema (Just "Replace") $ mempty
515 & type_ ?~ SwaggerObject
516 & properties .~
517 InsOrdHashMap.fromList
518 [ ("old", aSchema)
519 , ("new", aSchema)
520 ]
521 & required .~ [ "old", "new" ]
522
523 data NgramsPatch
524 = NgramsPatch { _patch_children :: PatchMSet NgramsTerm
525 , _patch_list :: Replace ListType -- TODO Map UserId ListType
526 }
527 | NgramsReplace { _patch_old :: Maybe NgramsRepoElement
528 , _patch_new :: Maybe NgramsRepoElement
529 }
530 deriving (Eq, Show, Generic)
531
532 -- The JSON encoding is untagged, this is OK since the field names are disjoints and thus the encoding is unambiguous.
533 -- TODO: the empty object should be accepted and treated as mempty.
534 deriveJSON (unPrefixUntagged "_") ''NgramsPatch
535 makeLenses ''NgramsPatch
536
537 -- TODO: This instance is simplified since we should either have the fields children and/or list
538 -- or the fields old and/or new.
539 instance ToSchema NgramsPatch where
540 declareNamedSchema _ = do
541 childrenSch <- declareSchemaRef (Proxy :: Proxy (PatchMSet NgramsTerm))
542 listSch <- declareSchemaRef (Proxy :: Proxy (Replace ListType))
543 nreSch <- declareSchemaRef (Proxy :: Proxy NgramsRepoElement)
544 return $ NamedSchema (Just "NgramsPatch") $ mempty
545 & type_ ?~ SwaggerObject
546 & properties .~
547 InsOrdHashMap.fromList
548 [ ("children", childrenSch)
549 , ("list", listSch)
550 , ("old", nreSch)
551 , ("new", nreSch)
552 ]
553
554 instance Arbitrary NgramsPatch where
555 arbitrary = frequency [ (9, NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary))
556 , (1, NgramsReplace <$> arbitrary <*> arbitrary)
557 ]
558
559 instance Serialise NgramsPatch
560 instance Serialise (Replace ListType)
561
562 instance Serialise ListType
563
564 type NgramsPatchIso =
565 MaybePatch NgramsRepoElement (PairPatch (PatchMSet NgramsTerm) (Replace ListType))
566
567 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
568 _NgramsPatch = iso unwrap wrap
569 where
570 unwrap (NgramsPatch c l) = Mod $ PairPatch (c, l)
571 unwrap (NgramsReplace o n) = replace o n
572 wrap x =
573 case unMod x of
574 Just (PairPatch (c, l)) -> NgramsPatch c l
575 Nothing -> NgramsReplace (x ^? old . _Just) (x ^? new . _Just)
576
577 instance Semigroup NgramsPatch where
578 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
579
580 instance Monoid NgramsPatch where
581 mempty = _NgramsPatch # mempty
582
583 instance Validity NgramsPatch where
584 validate p = p ^. _NgramsPatch . to validate
585
586 instance Transformable NgramsPatch where
587 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
588
589 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
590
591 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
592 where
593 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
594
595 type ConflictResolutionNgramsPatch =
596 ( ConflictResolutionReplace (Maybe NgramsRepoElement)
597 , ( ConflictResolutionPatchMSet NgramsTerm
598 , ConflictResolutionReplace ListType
599 )
600 , (Bool, Bool)
601 )
602 type instance ConflictResolution NgramsPatch =
603 ConflictResolutionNgramsPatch
604
605 type PatchedNgramsPatch = Maybe NgramsRepoElement
606 type instance Patched NgramsPatch = PatchedNgramsPatch
607
608 instance Applicable (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
609 applicable (PairPatch (c, l)) n = applicable c (n ^. nre_children) <> applicable l (n ^. nre_list)
610
611 instance Action (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
612 act (PairPatch (c, l)) = (nre_children %~ act c)
613 . (nre_list %~ act l)
614
615 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
616 applicable p = applicable (p ^. _NgramsPatch)
617
618 instance Action NgramsPatch (Maybe NgramsRepoElement) where
619 act p = act (p ^. _NgramsPatch)
620
621 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
622 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
623
624 instance Serialise NgramsTablePatch
625 instance Serialise (PatchMap NgramsTerm NgramsPatch)
626
627 instance FromField NgramsTablePatch
628 where
629 fromField = fromField'
630
631 instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
632 where
633 fromField = fromField'
634
635 type instance ConflictResolution NgramsTablePatch =
636 NgramsTerm -> ConflictResolutionNgramsPatch
637
638 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
639 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
640 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
641
642 makePrisms ''NgramsTablePatch
643 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
644 instance ToSchema NgramsTablePatch
645
646 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
647 applicable p = applicable (p ^. _NgramsTablePatch)
648
649 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
650 act p =
651 fmap (execState (reParentNgramsTablePatch p)) .
652 act (p ^. _NgramsTablePatch)
653
654 instance Arbitrary NgramsTablePatch where
655 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
656
657 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
658 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
659 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
660
661 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
662
663 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
664 reRootChildren root ngram = do
665 nre <- use $ at ngram
666 forOf_ (_Just . nre_children . folded) nre $ \child -> do
667 at child . _Just . nre_root ?= root
668 reRootChildren root child
669
670 reParent :: Maybe RootParent -> ReParent NgramsTerm
671 reParent rp child = do
672 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
673 . (nre_root .~ (_rp_root <$> rp))
674 )
675 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
676
677 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
678 reParentAddRem rp child p =
679 reParent (if isRem p then Nothing else Just rp) child
680
681 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
682 reParentNgramsPatch parent ngramsPatch = do
683 root_of_parent <- use (at parent . _Just . nre_root)
684 let
685 root = fromMaybe parent root_of_parent
686 rp = RootParent { _rp_root = root, _rp_parent = parent }
687 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
688 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
689
690 reParentNgramsTablePatch :: ReParent NgramsTablePatch
691 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
692 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
693
694 ------------------------------------------------------------------------
695 ------------------------------------------------------------------------
696 type Version = Int
697
698 data Versioned a = Versioned
699 { _v_version :: Version
700 , _v_data :: a
701 }
702 deriving (Generic, Show, Eq)
703 deriveJSON (unPrefix "_v_") ''Versioned
704 makeLenses ''Versioned
705 instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where
706 declareNamedSchema = wellNamedSchema "_v_"
707 instance Arbitrary a => Arbitrary (Versioned a) where
708 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
709
710
711 {-
712 -- TODO sequences of modifications (Patchs)
713 type NgramsIdPatch = Patch NgramsId NgramsPatch
714
715 ngramsPatch :: Int -> NgramsPatch
716 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
717
718 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
719 toEdit n p = Edit n p
720 ngramsIdPatch :: Patch NgramsId NgramsPatch
721 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
722 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
723 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
724 ]
725
726 -- applyPatchBack :: Patch -> IO Patch
727 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
728 -}
729 ------------------------------------------------------------------------
730 ------------------------------------------------------------------------
731 ------------------------------------------------------------------------
732
733 {-
734 -- TODO: Replace.old is ignored which means that if the current list
735 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
736 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
737 -- However this should not happen in non conflicting situations.
738 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
739 mkListsUpdate nt patches =
740 [ (ngramsTypeId nt, ng, listTypeId lt)
741 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
742 , lt <- patch ^.. patch_list . new
743 ]
744
745 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
746 -> NgramsType
747 -> NgramsTablePatch
748 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
749 mkChildrenGroups addOrRem nt patches =
750 [ (ngramsTypeId nt, parent, child)
751 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
752 , child <- patch ^.. patch_children . to addOrRem . folded
753 ]
754 -}
755
756 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
757 ngramsTypeFromTabType tabType =
758 let lieu = "Garg.API.Ngrams: " :: Text in
759 case tabType of
760 Sources -> TableNgrams.Sources
761 Authors -> TableNgrams.Authors
762 Institutes -> TableNgrams.Institutes
763 Terms -> TableNgrams.NgramsTerms
764 _ -> panic $ lieu <> "No Ngrams for this tab"
765 -- TODO: This `panic` would disapear with custom NgramsType.
766
767 ------------------------------------------------------------------------
768 data Repo s p = Repo
769 { _r_version :: Version
770 , _r_state :: s
771 , _r_history :: [p]
772 -- first patch in the list is the most recent
773 }
774 deriving (Generic)
775
776 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
777 parseJSON = genericParseJSON $ unPrefix "_r_"
778
779 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
780 toJSON = genericToJSON $ unPrefix "_r_"
781 toEncoding = genericToEncoding $ unPrefix "_r_"
782
783 instance (Serialise s, Serialise p) => Serialise (Repo s p)
784
785 makeLenses ''Repo
786
787 initRepo :: Monoid s => Repo s p
788 initRepo = Repo 1 mempty []
789
790 type NgramsRepo = Repo NgramsState NgramsStatePatch
791 type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
792 type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
793
794 instance Serialise (PM.PatchMap NodeId NgramsTablePatch)
795 instance Serialise NgramsStatePatch
796
797 initMockRepo :: NgramsRepo
798 initMockRepo = Repo 1 s []
799 where
800 s = Map.singleton TableNgrams.NgramsTerms
801 $ Map.singleton 47254
802 $ Map.fromList
803 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
804
805 data RepoEnv = RepoEnv
806 { _renv_var :: !(MVar NgramsRepo)
807 , _renv_saver :: !(IO ())
808 , _renv_lock :: !FileLock
809 }
810 deriving (Generic)
811
812 makeLenses ''RepoEnv
813
814 class HasRepoVar env where
815 repoVar :: Getter env (MVar NgramsRepo)
816
817 instance HasRepoVar (MVar NgramsRepo) where
818 repoVar = identity
819
820 class HasRepoSaver env where
821 repoSaver :: Getter env (IO ())
822
823 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
824 repoEnv :: Getter env RepoEnv
825
826 instance HasRepo RepoEnv where
827 repoEnv = identity
828
829 instance HasRepoVar RepoEnv where
830 repoVar = renv_var
831
832 instance HasRepoSaver RepoEnv where
833 repoSaver = renv_saver
834
835 type RepoCmdM env err m =
836 ( MonadReader env m
837 , MonadError err m
838 , MonadBaseControl IO m
839 , HasRepo env
840 )
841 ------------------------------------------------------------------------
842
843 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
844 => m ()
845 saveRepo = liftBase =<< view repoSaver
846
847 listTypeConflictResolution :: ListType -> ListType -> ListType
848 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
849
850 ngramsStatePatchConflictResolution
851 :: TableNgrams.NgramsType
852 -> NodeId
853 -> NgramsTerm
854 -> ConflictResolutionNgramsPatch
855 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
856 = (ours, (const ours, ours), (False, False))
857 -- (False, False) mean here that Mod has always priority.
858 -- (True, False) <- would mean priority to the left (same as ours).
859
860 -- undefined {- TODO think this through -}, listTypeConflictResolution)
861
862 -- Current state:
863 -- Insertions are not considered as patches,
864 -- they do not extend history,
865 -- they do not bump version.
866 insertNewOnly :: a -> Maybe b -> a
867 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
868 -- TODO error handling
869
870 something :: Monoid a => Maybe a -> a
871 something Nothing = mempty
872 something (Just a) = a
873
874 {- unused
875 -- TODO refactor with putListNgrams
876 copyListNgrams :: RepoCmdM env err m
877 => NodeId -> NodeId -> NgramsType
878 -> m ()
879 copyListNgrams srcListId dstListId ngramsType = do
880 var <- view repoVar
881 liftBase $ modifyMVar_ var $
882 pure . (r_state . at ngramsType %~ (Just . f . something))
883 saveRepo
884 where
885 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
886 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
887
888 -- TODO refactor with putListNgrams
889 -- The list must be non-empty!
890 -- The added ngrams must be non-existent!
891 addListNgrams :: RepoCmdM env err m
892 => NodeId -> NgramsType
893 -> [NgramsElement] -> m ()
894 addListNgrams listId ngramsType nes = do
895 var <- view repoVar
896 liftBase $ modifyMVar_ var $
897 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
898 saveRepo
899 where
900 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
901 -}
902
903 -- UNSAFE
904 rmListNgrams :: RepoCmdM env err m
905 => ListId
906 -> TableNgrams.NgramsType
907 -> m ()
908 rmListNgrams l nt = setListNgrams l nt mempty
909
910 -- | TODO: incr the Version number
911 -- && should use patch
912 -- UNSAFE
913 setListNgrams :: RepoCmdM env err m
914 => NodeId
915 -> TableNgrams.NgramsType
916 -> Map NgramsTerm NgramsRepoElement
917 -> m ()
918 setListNgrams listId ngramsType ns = do
919 var <- view repoVar
920 liftBase $ modifyMVar_ var $
921 pure . ( r_state
922 . at ngramsType %~
923 (Just .
924 (at listId .~ ( Just ns))
925 . something
926 )
927 )
928 saveRepo
929
930 -- NOTE
931 -- This is no longer part of the API.
932 -- This function is maintained for its usage in Database.Action.Flow.List.
933 -- If the given list of ngrams elements contains ngrams already in
934 -- the repo, they will be ignored.
935 putListNgrams :: (HasInvalidError err, RepoCmdM env err m)
936 => NodeId
937 -> TableNgrams.NgramsType
938 -> [NgramsElement] -> m ()
939 putListNgrams _ _ [] = pure ()
940 putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
941 where
942 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
943
944 putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
945 => NodeId
946 -> TableNgrams.NgramsType
947 -> Map NgramsTerm NgramsRepoElement
948 -> m ()
949 putListNgrams' nodeId ngramsType ns = do
950 -- printDebug "[putLictNgrams'] nodeId" nodeId
951 -- printDebug "[putLictNgrams'] ngramsType" ngramsType
952 -- printDebug "[putListNgrams'] ns" ns
953
954 let p1 = NgramsTablePatch . PM.fromMap $ NgramsReplace Nothing . Just <$> ns
955 (p0, p0_validity) = PM.singleton nodeId p1
956 (p, p_validity) = PM.singleton ngramsType p0
957 assertValid p0_validity
958 assertValid p_validity
959 {-
960 -- TODO
961 v <- currentVersion
962 q <- commitStatePatch (Versioned v p)
963 assert empty q
964 -- What if another commit comes in between?
965 -- Shall we have a blindCommitStatePatch? It would not ask for a version but just a patch.
966 -- The modifyMVar_ would test the patch with applicable first.
967 -- If valid the rest would be atomic and no merge is required.
968 -}
969 var <- view repoVar
970 liftBase $ modifyMVar_ var $ \r -> do
971 pure $ r & r_version +~ 1
972 & r_history %~ (p :)
973 & r_state . at ngramsType %~
974 (Just .
975 (at nodeId %~
976 ( Just
977 . (<> ns)
978 . something
979 )
980 )
981 . something
982 )
983 saveRepo
984
985
986 currentVersion :: RepoCmdM env err m
987 => m Version
988 currentVersion = do
989 var <- view repoVar
990 r <- liftBase $ readMVar var
991 pure $ r ^. r_version
992
993
994 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
995 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
996 commitStatePatch (Versioned p_version p) = do
997 var <- view repoVar
998 vq' <- liftBase $ modifyMVar var $ \r -> do
999 let
1000 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
1001 (p', q') = transformWith ngramsStatePatchConflictResolution p q
1002 r' = r & r_version +~ 1
1003 & r_state %~ act p'
1004 & r_history %~ (p' :)
1005 {-
1006 -- Ideally we would like to check these properties. However:
1007 -- * They should be checked only to debug the code. The client data
1008 -- should be able to trigger these.
1009 -- * What kind of error should they throw (we are in IO here)?
1010 -- * Should we keep modifyMVar?
1011 -- * Should we throw the validation in an Exception, catch it around
1012 -- modifyMVar and throw it back as an Error?
1013 assertValid $ transformable p q
1014 assertValid $ applicable p' (r ^. r_state)
1015 -}
1016 pure (r', Versioned (r' ^. r_version) q')
1017
1018 saveRepo
1019 pure vq'
1020
1021 -- This is a special case of tableNgramsPut where the input patch is empty.
1022 tableNgramsPull :: RepoCmdM env err m
1023 => ListId
1024 -> TableNgrams.NgramsType
1025 -> Version
1026 -> m (Versioned NgramsTablePatch)
1027 tableNgramsPull listId ngramsType p_version = do
1028 var <- view repoVar
1029 r <- liftBase $ readMVar var
1030
1031 let
1032 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
1033 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
1034
1035 pure (Versioned (r ^. r_version) q_table)
1036
1037 -- Apply the given patch to the DB and returns the patch to be applied on the
1038 -- client.
1039 -- TODO-ACCESS check
1040 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
1041 => TabType -> ListId
1042 -> Versioned NgramsTablePatch
1043 -> m (Versioned NgramsTablePatch)
1044 tableNgramsPut tabType listId (Versioned p_version p_table)
1045 | p_table == mempty = do
1046 let ngramsType = ngramsTypeFromTabType tabType
1047 tableNgramsPull listId ngramsType p_version
1048
1049 | otherwise = do
1050 let ngramsType = ngramsTypeFromTabType tabType
1051 (p0, p0_validity) = PM.singleton listId p_table
1052 (p, p_validity) = PM.singleton ngramsType p0
1053
1054 assertValid p0_validity
1055 assertValid p_validity
1056
1057 commitStatePatch (Versioned p_version p)
1058 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
1059
1060 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
1061 mergeNgramsElement _neOld neNew = neNew
1062 {-
1063 { _ne_list :: ListType
1064 If we merge the parents/children we can potentially create cycles!
1065 , _ne_parent :: Maybe NgramsTerm
1066 , _ne_children :: MSet NgramsTerm
1067 }
1068 -}
1069
1070 getNgramsTableMap :: RepoCmdM env err m
1071 => NodeId
1072 -> TableNgrams.NgramsType
1073 -> m (Versioned NgramsTableMap)
1074 getNgramsTableMap nodeId ngramsType = do
1075 v <- view repoVar
1076 repo <- liftBase $ readMVar v
1077 pure $ Versioned (repo ^. r_version)
1078 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1079
1080 dumpJsonTableMap :: RepoCmdM env err m
1081 => Text
1082 -> NodeId
1083 -> TableNgrams.NgramsType
1084 -> m ()
1085 dumpJsonTableMap fpath nodeId ngramsType = do
1086 m <- getNgramsTableMap nodeId ngramsType
1087 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
1088 pure ()
1089
1090 type MinSize = Int
1091 type MaxSize = Int
1092
1093 -- | TODO Errors management
1094 -- TODO: polymorphic for Annuaire or Corpus or ...
1095 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1096 -- TODO: should take only one ListId
1097
1098 getTime' :: MonadBase IO m => m TimeSpec
1099 getTime' = liftBase $ getTime ProcessCPUTime
1100
1101
1102 getTableNgrams :: forall env err m.
1103 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1104 => NodeType -> NodeId -> TabType
1105 -> ListId -> Limit -> Maybe Offset
1106 -> Maybe ListType
1107 -> Maybe MinSize -> Maybe MaxSize
1108 -> Maybe OrderBy
1109 -> (NgramsTerm -> Bool)
1110 -> m (Versioned NgramsTable)
1111 getTableNgrams _nType nId tabType listId limit_ offset
1112 listType minSize maxSize orderBy searchQuery = do
1113
1114 t0 <- getTime'
1115 -- lIds <- selectNodesWithUsername NodeList userMaster
1116 let
1117 ngramsType = ngramsTypeFromTabType tabType
1118 offset' = maybe 0 identity offset
1119 listType' = maybe (const True) (==) listType
1120 minSize' = maybe (const True) (<=) minSize
1121 maxSize' = maybe (const True) (>=) maxSize
1122
1123 selected_node n = minSize' s
1124 && maxSize' s
1125 && searchQuery (n ^. ne_ngrams)
1126 && listType' (n ^. ne_list)
1127 where
1128 s = n ^. ne_size
1129
1130 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1131
1132 ---------------------------------------
1133 sortOnOrder Nothing = identity
1134 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1135 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1136 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1137 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1138
1139 ---------------------------------------
1140 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1141 selectAndPaginate tableMap = roots <> inners
1142 where
1143 list = tableMap ^.. each
1144 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1145 (ne ^. ne_root)
1146 selected_nodes = list & take limit_
1147 . drop offset'
1148 . filter selected_node
1149 . sortOnOrder orderBy
1150 roots = rootOf <$> selected_nodes
1151 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1152 inners = list & filter (selected_inner rootsSet)
1153
1154 ---------------------------------------
1155 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1156 setScores False table = pure table
1157 setScores True table = do
1158 let ngrams_terms = (table ^.. each . ne_ngrams)
1159 t1 <- getTime'
1160 occurrences <- getOccByNgramsOnlyFast' nId
1161 listId
1162 ngramsType
1163 ngrams_terms
1164 t2 <- getTime'
1165 liftBase $ hprint stderr
1166 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1167 (length ngrams_terms) t1 t2
1168 {-
1169 occurrences <- getOccByNgramsOnlySlow nType nId
1170 (lIds <> [listId])
1171 ngramsType
1172 ngrams_terms
1173 -}
1174 let
1175 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1176
1177 pure $ table & each %~ setOcc
1178 ---------------------------------------
1179
1180 -- lists <- catMaybes <$> listsWith userMaster
1181 -- trace (show lists) $
1182 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1183
1184 let scoresNeeded = needsScores orderBy
1185 tableMap1 <- getNgramsTableMap listId ngramsType
1186 t1 <- getTime'
1187 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1188 . Map.mapWithKey ngramsElementFromRepo
1189 t2 <- getTime'
1190 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1191 . setScores (not scoresNeeded)
1192 . selectAndPaginate
1193 t3 <- getTime'
1194 liftBase $ hprint stderr
1195 ("getTableNgrams total=" % timeSpecs
1196 % " map1=" % timeSpecs
1197 % " map2=" % timeSpecs
1198 % " map3=" % timeSpecs
1199 % " sql=" % (if scoresNeeded then "map2" else "map3")
1200 % "\n"
1201 ) t0 t3 t0 t1 t1 t2 t2 t3
1202 pure tableMap3
1203
1204
1205 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
1206 scoresRecomputeTableNgrams nId tabType listId = do
1207 tableMap <- getNgramsTableMap listId ngramsType
1208 _ <- tableMap & v_data %%~ setScores
1209 . Map.mapWithKey ngramsElementFromRepo
1210
1211 pure $ 1
1212 where
1213 ngramsType = ngramsTypeFromTabType tabType
1214
1215 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
1216 setScores table = do
1217 let ngrams_terms = (table ^.. each . ne_ngrams)
1218 occurrences <- getOccByNgramsOnlyFast' nId
1219 listId
1220 ngramsType
1221 ngrams_terms
1222 let
1223 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1224
1225 pure $ table & each %~ setOcc
1226
1227
1228
1229 -- APIs
1230
1231 -- TODO: find a better place for the code above, All APIs stay here
1232 type QueryParamR = QueryParam' '[Required, Strict]
1233
1234 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1235 deriving (Generic, Enum, Bounded, Read, Show)
1236
1237 instance FromHttpApiData OrderBy
1238 where
1239 parseUrlPiece "TermAsc" = pure TermAsc
1240 parseUrlPiece "TermDesc" = pure TermDesc
1241 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1242 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1243 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1244
1245
1246 instance ToParamSchema OrderBy
1247 instance FromJSON OrderBy
1248 instance ToJSON OrderBy
1249 instance ToSchema OrderBy
1250 instance Arbitrary OrderBy
1251 where
1252 arbitrary = elements [minBound..maxBound]
1253
1254 needsScores :: Maybe OrderBy -> Bool
1255 needsScores (Just ScoreAsc) = True
1256 needsScores (Just ScoreDesc) = True
1257 needsScores _ = False
1258
1259 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1260 :> QueryParamR "ngramsType" TabType
1261 :> QueryParamR "list" ListId
1262 :> QueryParamR "limit" Limit
1263 :> QueryParam "offset" Offset
1264 :> QueryParam "listType" ListType
1265 :> QueryParam "minTermSize" MinSize
1266 :> QueryParam "maxTermSize" MaxSize
1267 :> QueryParam "orderBy" OrderBy
1268 :> QueryParam "search" Text
1269 :> Get '[JSON] (Versioned NgramsTable)
1270
1271 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1272 :> QueryParamR "ngramsType" TabType
1273 :> QueryParamR "list" ListId
1274 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1275 :> Put '[JSON] (Versioned NgramsTablePatch)
1276
1277 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
1278 :> QueryParamR "ngramsType" TabType
1279 :> QueryParamR "list" ListId
1280 :> "recompute" :> Post '[JSON] Int
1281
1282 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
1283 :> QueryParamR "ngramsType" TabType
1284 :> QueryParamR "list" ListId
1285 :> Get '[JSON] Version
1286
1287 type TableNgramsApi = TableNgramsApiGet
1288 :<|> TableNgramsApiPut
1289 :<|> RecomputeScoresNgramsApiGet
1290 :<|> "version" :> TableNgramsApiGetVersion
1291
1292 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1293 => NodeId
1294 -> TabType
1295 -> ListId
1296 -> Limit
1297 -> Maybe Offset
1298 -> Maybe ListType
1299 -> Maybe MinSize -> Maybe MaxSize
1300 -> Maybe OrderBy
1301 -> Maybe Text -- full text search
1302 -> m (Versioned NgramsTable)
1303 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1304 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1305 where
1306 searchQuery = maybe (const True) isInfixOf mt
1307
1308 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1309 => NodeId
1310 -> TabType
1311 -> ListId
1312 -> m Version
1313 getTableNgramsVersion _nId _tabType _listId = currentVersion
1314 -- TODO: limit?
1315 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
1316 -- This line above looks like a waste of computation to finally get only the version.
1317 -- See the comment about listNgramsChangedSince.
1318
1319
1320 -- | Text search is deactivated for now for ngrams by doc only
1321 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1322 => DocId -> TabType
1323 -> ListId -> Limit -> Maybe Offset
1324 -> Maybe ListType
1325 -> Maybe MinSize -> Maybe MaxSize
1326 -> Maybe OrderBy
1327 -> Maybe Text -- full text search
1328 -> m (Versioned NgramsTable)
1329 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1330 ns <- selectNodesWithUsername NodeList userMaster
1331 let ngramsType = ngramsTypeFromTabType tabType
1332 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1333 let searchQuery = flip S.member (S.fromList ngs)
1334 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1335
1336
1337
1338 apiNgramsTableCorpus :: ( RepoCmdM env err m
1339 , HasNodeError err
1340 , HasInvalidError err
1341 , HasConnectionPool env
1342 , HasConfig env
1343 )
1344 => NodeId -> ServerT TableNgramsApi m
1345 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1346 :<|> tableNgramsPut
1347 :<|> scoresRecomputeTableNgrams cId
1348 :<|> getTableNgramsVersion cId
1349
1350 apiNgramsTableDoc :: ( RepoCmdM env err m
1351 , HasNodeError err
1352 , HasInvalidError err
1353 , HasConnectionPool env
1354 , HasConfig env
1355 )
1356 => DocId -> ServerT TableNgramsApi m
1357 apiNgramsTableDoc dId = getTableNgramsDoc dId
1358 :<|> tableNgramsPut
1359 :<|> scoresRecomputeTableNgrams dId
1360 :<|> getTableNgramsVersion dId
1361 -- > index all the corpus accordingly (TODO AD)
1362
1363 -- Did the given list of ngrams changed since the given version?
1364 -- The returned value is versioned boolean value, meaning that one always retrieve the
1365 -- latest version.
1366 -- If the given version is negative then one simply receive the latest version and True.
1367 -- Using this function is more precise than simply comparing the latest version number
1368 -- with the local version number. Indeed there might be no change to this particular list
1369 -- and still the version number has changed because of other lists.
1370 --
1371 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
1372 -- * currentVersion: good computation, good bandwidth, bad precision.
1373 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
1374 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
1375 listNgramsChangedSince :: RepoCmdM env err m
1376 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
1377 listNgramsChangedSince listId ngramsType version
1378 | version < 0 =
1379 Versioned <$> currentVersion <*> pure True
1380 | otherwise =
1381 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1382
1383 -- Instances
1384 instance Arbitrary NgramsRepoElement where
1385 arbitrary = elements $ map ngramsElementToRepo ns
1386 where
1387 NgramsTable ns = mockTable
1388
1389 --{-
1390 instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
1391 where
1392 parseUrlPiece x = maybeToEither x (decode $ cs x)