]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[ngrams] NgramsTerm as a newtype
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24 {-# OPTIONS -fno-warn-orphans #-}
25
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , setListNgrams
33 --, rmListNgrams TODO fix before exporting
34 , putListNgrams
35 --, putListNgrams'
36 , apiNgramsTableCorpus
37 , apiNgramsTableDoc
38
39 , NgramsStatePatch
40 , NgramsTablePatch
41 , NgramsTableMap
42
43 , NgramsTerm(..)
44
45 , NgramsElement(..)
46 , mkNgramsElement
47 , mergeNgramsElement
48
49 , RootParent(..)
50
51 , MSet
52 , mSetFromList
53 , mSetToList
54
55 , Repo(..)
56 , r_version
57 , r_state
58 , r_history
59 , NgramsRepo
60 , NgramsRepoElement(..)
61 , saveRepo
62 , initRepo
63
64 , RepoEnv(..)
65 , renv_var
66 , renv_lock
67
68 , TabType(..)
69 , ngramsTypeFromTabType
70
71 , HasRepoVar(..)
72 , HasRepoSaver(..)
73 , HasRepo(..)
74 , RepoCmdM
75 , QueryParamR
76 , TODO
77
78 -- Internals
79 , getNgramsTableMap
80 , dumpJsonTableMap
81 , tableNgramsPull
82 , tableNgramsPut
83
84 , Version
85 , Versioned(..)
86 , currentVersion
87 , listNgramsChangedSince
88 )
89 where
90
91 import Codec.Serialise (Serialise())
92 import Control.Category ((>>>))
93 import Control.Concurrent
94 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
95 import Control.Monad.Base (MonadBase, liftBase)
96 import Control.Monad.Error.Class (MonadError)
97 import Control.Monad.Reader
98 import Control.Monad.State
99 import Control.Monad.Trans.Control (MonadBaseControl)
100 import Data.Aeson hiding ((.=))
101 import Data.Aeson.TH (deriveJSON)
102 import qualified Data.Aeson.Text as DAT
103 import Data.Either (Either(..))
104 import Data.Foldable
105 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
106 import qualified Data.List as List
107 import Data.Map.Strict (Map)
108 import qualified Data.Map.Strict as Map
109 import qualified Data.Map.Strict.Patch as PM
110 import Data.Maybe (fromMaybe)
111 import Data.Monoid
112 import Data.Ord (Down(..))
113 import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..),
114 PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours,
115 MaybePatch(Mod), unMod, old, new)
116 import Data.Set (Set)
117 import qualified Data.Set as S
118 import qualified Data.Set as Set
119 import Data.String (IsString, fromString)
120 import Data.Swagger hiding (version, patch)
121 import Data.Text (Text, count, isInfixOf, pack, strip, unpack)
122 import Data.Text.Lazy.IO as DTL
123 import Data.Validity
124 import Database.PostgreSQL.Simple.FromField (FromField, fromField, ResultError(ConversionFailed), returnError)
125 import Formatting (hprint, int, (%))
126 import Formatting.Clock (timeSpecs)
127 import GHC.Generics (Generic)
128 import Servant hiding (Patch)
129 import System.Clock (getTime, TimeSpec, Clock(..))
130 import System.FileLock (FileLock)
131 import System.IO (stderr)
132 import Test.QuickCheck (elements, frequency)
133 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
134
135 import Prelude (error)
136 import Protolude (maybeToEither)
137 import Gargantext.Prelude
138
139 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
140 import Gargantext.Core.Types (TODO)
141 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
142 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
143 import Gargantext.Database.Query.Table.Node.Select
144 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
145 import Gargantext.Database.Admin.Config (userMaster)
146 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
147 import Gargantext.Database.Admin.Types.Node (NodeType(..))
148 import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig)
149 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
150
151 ------------------------------------------------------------------------
152 --data FacetFormat = Table | Chart
153 data TabType = Docs | Trash | MoreFav | MoreTrash
154 | Terms | Sources | Authors | Institutes
155 | Contacts
156 deriving (Generic, Enum, Bounded, Show)
157
158 instance FromHttpApiData TabType
159 where
160 parseUrlPiece "Docs" = pure Docs
161 parseUrlPiece "Trash" = pure Trash
162 parseUrlPiece "MoreFav" = pure MoreFav
163 parseUrlPiece "MoreTrash" = pure MoreTrash
164
165 parseUrlPiece "Terms" = pure Terms
166 parseUrlPiece "Sources" = pure Sources
167 parseUrlPiece "Institutes" = pure Institutes
168 parseUrlPiece "Authors" = pure Authors
169
170 parseUrlPiece "Contacts" = pure Contacts
171
172 parseUrlPiece _ = Left "Unexpected value of TabType"
173
174 instance ToParamSchema TabType
175 instance ToJSON TabType
176 instance FromJSON TabType
177 instance ToSchema TabType
178 instance Arbitrary TabType
179 where
180 arbitrary = elements [minBound .. maxBound]
181
182 newtype MSet a = MSet (Map a ())
183 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
184
185 instance ToJSON a => ToJSON (MSet a) where
186 toJSON (MSet m) = toJSON (Map.keys m)
187 toEncoding (MSet m) = toEncoding (Map.keys m)
188
189 mSetFromSet :: Set a -> MSet a
190 mSetFromSet = MSet . Map.fromSet (const ())
191
192 mSetFromList :: Ord a => [a] -> MSet a
193 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
194
195 -- mSetToSet :: Ord a => MSet a -> Set a
196 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
197 mSetToSet :: Ord a => MSet a -> Set a
198 mSetToSet = Set.fromList . mSetToList
199
200 mSetToList :: MSet a -> [a]
201 mSetToList (MSet a) = Map.keys a
202
203 instance Foldable MSet where
204 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
205
206 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
207 parseJSON = fmap mSetFromList . parseJSON
208
209 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
210 -- TODO
211 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
212
213 ------------------------------------------------------------------------
214 newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
215 deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema)
216
217 instance FromJSONKey NgramsTerm where
218 fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
219
220 instance IsString NgramsTerm where
221 fromString s = NgramsTerm $ pack s
222
223 instance FromField NgramsTerm
224 where
225 fromField field mb = do
226 v <- fromField field mb
227 case fromJSON v of
228 Success a -> pure $ NgramsTerm $ strip a
229 Error _err -> returnError ConversionFailed field
230 $ List.intercalate " " [ "cannot parse hyperdata for JSON: "
231 , show v
232 ]
233
234 data RootParent = RootParent
235 { _rp_root :: NgramsTerm
236 , _rp_parent :: NgramsTerm
237 }
238 deriving (Ord, Eq, Show, Generic)
239
240 deriveJSON (unPrefix "_rp_") ''RootParent
241 makeLenses ''RootParent
242
243 data NgramsRepoElement = NgramsRepoElement
244 { _nre_size :: Int
245 , _nre_list :: ListType
246 --, _nre_root_parent :: Maybe RootParent
247 , _nre_root :: Maybe NgramsTerm
248 , _nre_parent :: Maybe NgramsTerm
249 , _nre_children :: MSet NgramsTerm
250 }
251 deriving (Ord, Eq, Show, Generic)
252
253 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
254 -- TODO
255 -- if ngrams & not size => size
256 -- drop occurrences
257
258 makeLenses ''NgramsRepoElement
259
260 instance ToSchema NgramsRepoElement where
261 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
262
263 instance Serialise (MSet NgramsTerm)
264 instance Serialise NgramsRepoElement
265
266 data NgramsElement =
267 NgramsElement { _ne_ngrams :: NgramsTerm
268 , _ne_size :: Int
269 , _ne_list :: ListType
270 , _ne_occurrences :: Int
271 , _ne_root :: Maybe NgramsTerm
272 , _ne_parent :: Maybe NgramsTerm
273 , _ne_children :: MSet NgramsTerm
274 }
275 deriving (Ord, Eq, Show, Generic)
276
277 deriveJSON (unPrefix "_ne_") ''NgramsElement
278 makeLenses ''NgramsElement
279
280 mkNgramsElement :: NgramsTerm
281 -> ListType
282 -> Maybe RootParent
283 -> MSet NgramsTerm
284 -> NgramsElement
285 mkNgramsElement ngrams list rp children =
286 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
287 where
288 -- TODO review
289 size = 1 + (count " " $ unNgramsTerm ngrams)
290
291 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
292 newNgramsElement mayList ngrams =
293 mkNgramsElement ngrams (fromMaybe MapTerm mayList) Nothing mempty
294
295 instance ToSchema NgramsElement where
296 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
297 instance Arbitrary NgramsElement where
298 arbitrary = elements [newNgramsElement Nothing "sport"]
299
300 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
301 ngramsElementToRepo
302 (NgramsElement { _ne_size = s
303 , _ne_list = l
304 , _ne_root = r
305 , _ne_parent = p
306 , _ne_children = c
307 }) =
308 NgramsRepoElement
309 { _nre_size = s
310 , _nre_list = l
311 , _nre_parent = p
312 , _nre_root = r
313 , _nre_children = c
314 }
315
316 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
317 ngramsElementFromRepo
318 ngrams
319 (NgramsRepoElement
320 { _nre_size = s
321 , _nre_list = l
322 , _nre_parent = p
323 , _nre_root = r
324 , _nre_children = c
325 }) =
326 NgramsElement { _ne_size = s
327 , _ne_list = l
328 , _ne_root = r
329 , _ne_parent = p
330 , _ne_children = c
331 , _ne_ngrams = ngrams
332 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
333 {-
334 -- Here we could use 0 if we want to avoid any `panic`.
335 -- It will not happen using getTableNgrams if
336 -- getOccByNgramsOnly provides a count of occurrences for
337 -- all the ngrams given.
338 -}
339 }
340
341 ------------------------------------------------------------------------
342 newtype NgramsTable = NgramsTable [NgramsElement]
343 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
344
345 type NgramsList = NgramsTable
346
347 makePrisms ''NgramsTable
348
349 -- | Question: why these repetition of Type in this instance
350 -- may you document it please ?
351 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
352 each = _NgramsTable . each
353
354 -- TODO discuss
355 -- | TODO Check N and Weight
356 {-
357 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
358 toNgramsElement ns = map toNgramsElement' ns
359 where
360 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
361 where
362 p' = case p of
363 Nothing -> Nothing
364 Just x -> lookup x mapParent
365 c' = maybe mempty identity $ lookup t mapChildren
366 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
367
368 mapParent :: Map Int Text
369 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
370
371 mapChildren :: Map Text (Set Text)
372 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
373 $ Map.fromListWith (<>)
374 $ map (first fromJust)
375 $ filter (isJust . fst)
376 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
377 -}
378
379 mockTable :: NgramsTable
380 mockTable = NgramsTable
381 [ mkNgramsElement "animal" MapTerm Nothing (mSetFromList ["dog", "cat"])
382 , mkNgramsElement "cat" MapTerm (rp "animal") mempty
383 , mkNgramsElement "cats" StopTerm Nothing mempty
384 , mkNgramsElement "dog" MapTerm (rp "animal") (mSetFromList ["dogs"])
385 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
386 , mkNgramsElement "fox" MapTerm Nothing mempty
387 , mkNgramsElement "object" CandidateTerm Nothing mempty
388 , mkNgramsElement "nothing" StopTerm Nothing mempty
389 , mkNgramsElement "organic" MapTerm Nothing (mSetFromList ["flower"])
390 , mkNgramsElement "flower" MapTerm (rp "organic") mempty
391 , mkNgramsElement "moon" CandidateTerm Nothing mempty
392 , mkNgramsElement "sky" StopTerm Nothing mempty
393 ]
394 where
395 rp n = Just $ RootParent n n
396
397 instance Arbitrary NgramsTable where
398 arbitrary = pure mockTable
399
400 instance ToSchema NgramsTable
401
402 ------------------------------------------------------------------------
403 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
404 ------------------------------------------------------------------------
405 -- On the Client side:
406 --data Action = InGroup NgramsId NgramsId
407 -- | OutGroup NgramsId NgramsId
408 -- | SetListType NgramsId ListType
409
410 data PatchSet a = PatchSet
411 { _rem :: Set a
412 , _add :: Set a
413 }
414 deriving (Eq, Ord, Show, Generic)
415
416 makeLenses ''PatchSet
417 makePrisms ''PatchSet
418
419 instance ToJSON a => ToJSON (PatchSet a) where
420 toJSON = genericToJSON $ unPrefix "_"
421 toEncoding = genericToEncoding $ unPrefix "_"
422
423 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
424 parseJSON = genericParseJSON $ unPrefix "_"
425
426 {-
427 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
428 arbitrary = PatchSet <$> arbitrary <*> arbitrary
429
430 type instance Patched (PatchSet a) = Set a
431
432 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
433 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
434
435 instance Ord a => Semigroup (PatchSet a) where
436 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
437 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
438 } -- TODO Review
439
440 instance Ord a => Monoid (PatchSet a) where
441 mempty = PatchSet mempty mempty
442
443 instance Ord a => Group (PatchSet a) where
444 invert (PatchSet r a) = PatchSet a r
445
446 instance Ord a => Composable (PatchSet a) where
447 composable _ _ = undefined
448
449 instance Ord a => Action (PatchSet a) (Set a) where
450 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
451
452 instance Applicable (PatchSet a) (Set a) where
453 applicable _ _ = mempty
454
455 instance Ord a => Validity (PatchSet a) where
456 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
457
458 instance Ord a => Transformable (PatchSet a) where
459 transformable = undefined
460
461 conflicts _p _q = undefined
462
463 transformWith conflict p q = undefined conflict p q
464
465 instance ToSchema a => ToSchema (PatchSet a)
466 -}
467
468 type AddRem = Replace (Maybe ())
469
470 instance Serialise AddRem
471
472 remPatch, addPatch :: AddRem
473 remPatch = replace (Just ()) Nothing
474 addPatch = replace Nothing (Just ())
475
476 isRem :: Replace (Maybe ()) -> Bool
477 isRem = (== remPatch)
478
479 type PatchMap = PM.PatchMap
480
481
482 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
483 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid, Group,
484 Transformable, Composable)
485
486 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
487 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
488
489 instance (Serialise a, Ord a) => Serialise (PatchMap a AddRem)
490 instance (Serialise a, Ord a) => Serialise (PatchMSet a)
491
492 -- TODO this breaks module abstraction
493 makePrisms ''PM.PatchMap
494
495 makePrisms ''PatchMSet
496
497 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
498 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
499 where
500 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
501 f = Map.partition isRem >>> both %~ Map.keysSet
502
503 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
504 g (rems, adds) = Map.fromSet (const remPatch) rems
505 <> Map.fromSet (const addPatch) adds
506
507 instance Ord a => Action (PatchMSet a) (MSet a) where
508 act (PatchMSet p) (MSet m) = MSet $ act p m
509
510 instance Ord a => Applicable (PatchMSet a) (MSet a) where
511 applicable (PatchMSet p) (MSet m) = applicable p m
512
513 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
514 toJSON = toJSON . view _PatchMSetIso
515 toEncoding = toEncoding . view _PatchMSetIso
516
517 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
518 parseJSON = fmap (_PatchMSetIso #) . parseJSON
519
520 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
521 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
522
523 instance ToSchema a => ToSchema (PatchMSet a) where
524 -- TODO
525 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
526
527 type instance Patched (PatchMSet a) = MSet a
528
529 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
530 arbitrary = uncurry replace <$> arbitrary
531 -- If they happen to be equal then the patch is Keep.
532
533 instance ToSchema a => ToSchema (Replace a) where
534 declareNamedSchema (_ :: Proxy (Replace a)) = do
535 -- TODO Keep constructor is not supported here.
536 aSchema <- declareSchemaRef (Proxy :: Proxy a)
537 return $ NamedSchema (Just "Replace") $ mempty
538 & type_ ?~ SwaggerObject
539 & properties .~
540 InsOrdHashMap.fromList
541 [ ("old", aSchema)
542 , ("new", aSchema)
543 ]
544 & required .~ [ "old", "new" ]
545
546 data NgramsPatch
547 = NgramsPatch { _patch_children :: PatchMSet NgramsTerm
548 , _patch_list :: Replace ListType -- TODO Map UserId ListType
549 }
550 | NgramsReplace { _patch_old :: Maybe NgramsRepoElement
551 , _patch_new :: Maybe NgramsRepoElement
552 }
553 deriving (Eq, Show, Generic)
554
555 -- The JSON encoding is untagged, this is OK since the field names are disjoints and thus the encoding is unambiguous.
556 -- TODO: the empty object should be accepted and treated as mempty.
557 deriveJSON (unPrefixUntagged "_") ''NgramsPatch
558 makeLenses ''NgramsPatch
559
560 -- TODO: This instance is simplified since we should either have the fields children and/or list
561 -- or the fields old and/or new.
562 instance ToSchema NgramsPatch where
563 declareNamedSchema _ = do
564 childrenSch <- declareSchemaRef (Proxy :: Proxy (PatchMSet NgramsTerm))
565 listSch <- declareSchemaRef (Proxy :: Proxy (Replace ListType))
566 nreSch <- declareSchemaRef (Proxy :: Proxy NgramsRepoElement)
567 return $ NamedSchema (Just "NgramsPatch") $ mempty
568 & type_ ?~ SwaggerObject
569 & properties .~
570 InsOrdHashMap.fromList
571 [ ("children", childrenSch)
572 , ("list", listSch)
573 , ("old", nreSch)
574 , ("new", nreSch)
575 ]
576
577 instance Arbitrary NgramsPatch where
578 arbitrary = frequency [ (9, NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary))
579 , (1, NgramsReplace <$> arbitrary <*> arbitrary)
580 ]
581
582 instance Serialise NgramsPatch
583 instance Serialise (Replace ListType)
584
585 instance Serialise ListType
586
587 type NgramsPatchIso =
588 MaybePatch NgramsRepoElement (PairPatch (PatchMSet NgramsTerm) (Replace ListType))
589
590 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
591 _NgramsPatch = iso unwrap wrap
592 where
593 unwrap (NgramsPatch c l) = Mod $ PairPatch (c, l)
594 unwrap (NgramsReplace o n) = replace o n
595 wrap x =
596 case unMod x of
597 Just (PairPatch (c, l)) -> NgramsPatch c l
598 Nothing -> NgramsReplace (x ^? old . _Just) (x ^? new . _Just)
599
600 instance Semigroup NgramsPatch where
601 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
602
603 instance Monoid NgramsPatch where
604 mempty = _NgramsPatch # mempty
605
606 instance Validity NgramsPatch where
607 validate p = p ^. _NgramsPatch . to validate
608
609 instance Transformable NgramsPatch where
610 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
611
612 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
613
614 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
615 where
616 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
617
618 type ConflictResolutionNgramsPatch =
619 ( ConflictResolutionReplace (Maybe NgramsRepoElement)
620 , ( ConflictResolutionPatchMSet NgramsTerm
621 , ConflictResolutionReplace ListType
622 )
623 , (Bool, Bool)
624 )
625 type instance ConflictResolution NgramsPatch =
626 ConflictResolutionNgramsPatch
627
628 type PatchedNgramsPatch = Maybe NgramsRepoElement
629 type instance Patched NgramsPatch = PatchedNgramsPatch
630
631 instance Applicable (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
632 applicable (PairPatch (c, l)) n = applicable c (n ^. nre_children) <> applicable l (n ^. nre_list)
633
634 instance Action (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
635 act (PairPatch (c, l)) = (nre_children %~ act c)
636 . (nre_list %~ act l)
637
638 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
639 applicable p = applicable (p ^. _NgramsPatch)
640
641 instance Action NgramsPatch (Maybe NgramsRepoElement) where
642 act p = act (p ^. _NgramsPatch)
643
644 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
645 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
646
647 instance Serialise NgramsTablePatch
648 instance Serialise (PatchMap NgramsTerm NgramsPatch)
649
650 instance FromField NgramsTablePatch
651 where
652 fromField = fromField'
653
654 instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
655 where
656 fromField = fromField'
657
658 type instance ConflictResolution NgramsTablePatch =
659 NgramsTerm -> ConflictResolutionNgramsPatch
660
661 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
662 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
663 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
664
665 makePrisms ''NgramsTablePatch
666 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
667 instance ToSchema NgramsTablePatch
668
669 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
670 applicable p = applicable (p ^. _NgramsTablePatch)
671
672 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
673 act p =
674 fmap (execState (reParentNgramsTablePatch p)) .
675 act (p ^. _NgramsTablePatch)
676
677 instance Arbitrary NgramsTablePatch where
678 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
679
680 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
681 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
682 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
683
684 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
685
686 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
687 reRootChildren root ngram = do
688 nre <- use $ at ngram
689 forOf_ (_Just . nre_children . folded) nre $ \child -> do
690 at child . _Just . nre_root ?= root
691 reRootChildren root child
692
693 reParent :: Maybe RootParent -> ReParent NgramsTerm
694 reParent rp child = do
695 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
696 . (nre_root .~ (_rp_root <$> rp))
697 )
698 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
699
700 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
701 reParentAddRem rp child p =
702 reParent (if isRem p then Nothing else Just rp) child
703
704 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
705 reParentNgramsPatch parent ngramsPatch = do
706 root_of_parent <- use (at parent . _Just . nre_root)
707 let
708 root = fromMaybe parent root_of_parent
709 rp = RootParent { _rp_root = root, _rp_parent = parent }
710 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
711 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
712
713 reParentNgramsTablePatch :: ReParent NgramsTablePatch
714 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
715 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
716
717 ------------------------------------------------------------------------
718 ------------------------------------------------------------------------
719 type Version = Int
720
721 data Versioned a = Versioned
722 { _v_version :: Version
723 , _v_data :: a
724 }
725 deriving (Generic, Show, Eq)
726 deriveJSON (unPrefix "_v_") ''Versioned
727 makeLenses ''Versioned
728 instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where
729 declareNamedSchema = wellNamedSchema "_v_"
730 instance Arbitrary a => Arbitrary (Versioned a) where
731 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
732
733
734 {-
735 -- TODO sequences of modifications (Patchs)
736 type NgramsIdPatch = Patch NgramsId NgramsPatch
737
738 ngramsPatch :: Int -> NgramsPatch
739 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
740
741 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
742 toEdit n p = Edit n p
743 ngramsIdPatch :: Patch NgramsId NgramsPatch
744 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
745 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
746 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
747 ]
748
749 -- applyPatchBack :: Patch -> IO Patch
750 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
751 -}
752 ------------------------------------------------------------------------
753 ------------------------------------------------------------------------
754 ------------------------------------------------------------------------
755
756 {-
757 -- TODO: Replace.old is ignored which means that if the current list
758 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
759 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
760 -- However this should not happen in non conflicting situations.
761 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
762 mkListsUpdate nt patches =
763 [ (ngramsTypeId nt, ng, listTypeId lt)
764 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
765 , lt <- patch ^.. patch_list . new
766 ]
767
768 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
769 -> NgramsType
770 -> NgramsTablePatch
771 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
772 mkChildrenGroups addOrRem nt patches =
773 [ (ngramsTypeId nt, parent, child)
774 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
775 , child <- patch ^.. patch_children . to addOrRem . folded
776 ]
777 -}
778
779 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
780 ngramsTypeFromTabType tabType =
781 let lieu = "Garg.API.Ngrams: " :: Text in
782 case tabType of
783 Sources -> TableNgrams.Sources
784 Authors -> TableNgrams.Authors
785 Institutes -> TableNgrams.Institutes
786 Terms -> TableNgrams.NgramsTerms
787 _ -> panic $ lieu <> "No Ngrams for this tab"
788 -- TODO: This `panic` would disapear with custom NgramsType.
789
790 ------------------------------------------------------------------------
791 data Repo s p = Repo
792 { _r_version :: Version
793 , _r_state :: s
794 , _r_history :: [p]
795 -- first patch in the list is the most recent
796 }
797 deriving (Generic)
798
799 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
800 parseJSON = genericParseJSON $ unPrefix "_r_"
801
802 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
803 toJSON = genericToJSON $ unPrefix "_r_"
804 toEncoding = genericToEncoding $ unPrefix "_r_"
805
806 instance (Serialise s, Serialise p) => Serialise (Repo s p)
807
808 makeLenses ''Repo
809
810 initRepo :: Monoid s => Repo s p
811 initRepo = Repo 1 mempty []
812
813 type NgramsRepo = Repo NgramsState NgramsStatePatch
814 type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
815 type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
816
817 instance Serialise (PM.PatchMap NodeId NgramsTablePatch)
818 instance Serialise NgramsStatePatch
819
820 initMockRepo :: NgramsRepo
821 initMockRepo = Repo 1 s []
822 where
823 s = Map.singleton TableNgrams.NgramsTerms
824 $ Map.singleton 47254
825 $ Map.fromList
826 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
827
828 data RepoEnv = RepoEnv
829 { _renv_var :: !(MVar NgramsRepo)
830 , _renv_saver :: !(IO ())
831 , _renv_lock :: !FileLock
832 }
833 deriving (Generic)
834
835 makeLenses ''RepoEnv
836
837 class HasRepoVar env where
838 repoVar :: Getter env (MVar NgramsRepo)
839
840 instance HasRepoVar (MVar NgramsRepo) where
841 repoVar = identity
842
843 class HasRepoSaver env where
844 repoSaver :: Getter env (IO ())
845
846 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
847 repoEnv :: Getter env RepoEnv
848
849 instance HasRepo RepoEnv where
850 repoEnv = identity
851
852 instance HasRepoVar RepoEnv where
853 repoVar = renv_var
854
855 instance HasRepoSaver RepoEnv where
856 repoSaver = renv_saver
857
858 type RepoCmdM env err m =
859 ( MonadReader env m
860 , MonadError err m
861 , MonadBaseControl IO m
862 , HasRepo env
863 )
864 ------------------------------------------------------------------------
865
866 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
867 => m ()
868 saveRepo = liftBase =<< view repoSaver
869
870 listTypeConflictResolution :: ListType -> ListType -> ListType
871 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
872
873 ngramsStatePatchConflictResolution
874 :: TableNgrams.NgramsType
875 -> NodeId
876 -> NgramsTerm
877 -> ConflictResolutionNgramsPatch
878 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
879 = (ours, (const ours, ours), (False, False))
880 -- ^------^------- they mean that Mod has always priority.
881 --(True, False) <- would mean priority to the left (same as ours).
882
883 -- undefined {- TODO think this through -}, listTypeConflictResolution)
884
885 -- Current state:
886 -- Insertions are not considered as patches,
887 -- they do not extend history,
888 -- they do not bump version.
889 insertNewOnly :: a -> Maybe b -> a
890 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
891 -- TODO error handling
892
893 something :: Monoid a => Maybe a -> a
894 something Nothing = mempty
895 something (Just a) = a
896
897 {- unused
898 -- TODO refactor with putListNgrams
899 copyListNgrams :: RepoCmdM env err m
900 => NodeId -> NodeId -> NgramsType
901 -> m ()
902 copyListNgrams srcListId dstListId ngramsType = do
903 var <- view repoVar
904 liftBase $ modifyMVar_ var $
905 pure . (r_state . at ngramsType %~ (Just . f . something))
906 saveRepo
907 where
908 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
909 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
910
911 -- TODO refactor with putListNgrams
912 -- The list must be non-empty!
913 -- The added ngrams must be non-existent!
914 addListNgrams :: RepoCmdM env err m
915 => NodeId -> NgramsType
916 -> [NgramsElement] -> m ()
917 addListNgrams listId ngramsType nes = do
918 var <- view repoVar
919 liftBase $ modifyMVar_ var $
920 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
921 saveRepo
922 where
923 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
924 -}
925
926 -- UNSAFE
927 rmListNgrams :: RepoCmdM env err m
928 => ListId
929 -> TableNgrams.NgramsType
930 -> m ()
931 rmListNgrams l nt = setListNgrams l nt mempty
932
933 -- | TODO: incr the Version number
934 -- && should use patch
935 -- UNSAFE
936 setListNgrams :: RepoCmdM env err m
937 => NodeId
938 -> TableNgrams.NgramsType
939 -> Map NgramsTerm NgramsRepoElement
940 -> m ()
941 setListNgrams listId ngramsType ns = do
942 var <- view repoVar
943 liftBase $ modifyMVar_ var $
944 pure . ( r_state
945 . at ngramsType %~
946 (Just .
947 (at listId .~ ( Just ns))
948 . something
949 )
950 )
951 saveRepo
952
953 -- NOTE
954 -- This is no longer part of the API.
955 -- This function is maintained for its usage in Database.Action.Flow.List.
956 -- If the given list of ngrams elements contains ngrams already in
957 -- the repo, they will be ignored.
958 putListNgrams :: (HasInvalidError err, RepoCmdM env err m)
959 => NodeId
960 -> TableNgrams.NgramsType
961 -> [NgramsElement]
962 -> m ()
963 putListNgrams _ _ [] = pure ()
964 putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
965 where
966 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
967
968 putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
969 => NodeId
970 -> TableNgrams.NgramsType
971 -> Map NgramsTerm NgramsRepoElement
972 -> m ()
973 putListNgrams' nodeId ngramsType ns = do
974 printDebug "[putListNgrams'] nodeId" nodeId
975 printDebug "[putListNgrams'] ngramsType" ngramsType
976 printDebug "[putListNgrams'] ns" ns
977
978 let p1 = NgramsTablePatch . PM.fromMap $ NgramsReplace Nothing . Just <$> ns
979 (p0, p0_validity) = PM.singleton nodeId p1
980 (p, p_validity) = PM.singleton ngramsType p0
981 assertValid p0_validity
982 assertValid p_validity
983 {-
984 -- TODO
985 v <- currentVersion
986 q <- commitStatePatch (Versioned v p)
987 assert empty q
988 -- What if another commit comes in between?
989 -- Shall we have a blindCommitStatePatch? It would not ask for a version but just a patch.
990 -- The modifyMVar_ would test the patch with applicable first.
991 -- If valid the rest would be atomic and no merge is required.
992 -}
993 var <- view repoVar
994 liftBase $ modifyMVar_ var $ \r -> do
995 pure $ r & r_version +~ 1
996 & r_history %~ (p :)
997 & r_state . at ngramsType %~
998 (Just .
999 (at nodeId %~
1000 ( Just
1001 . (<> ns)
1002 . something
1003 )
1004 )
1005 . something
1006 )
1007 saveRepo
1008
1009
1010 currentVersion :: RepoCmdM env err m
1011 => m Version
1012 currentVersion = do
1013 var <- view repoVar
1014 r <- liftBase $ readMVar var
1015 pure $ r ^. r_version
1016
1017
1018 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
1019 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
1020 commitStatePatch (Versioned p_version p) = do
1021 var <- view repoVar
1022 vq' <- liftBase $ modifyMVar var $ \r -> do
1023 let
1024 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
1025 (p', q') = transformWith ngramsStatePatchConflictResolution p q
1026 r' = r & r_version +~ 1
1027 & r_state %~ act p'
1028 & r_history %~ (p' :)
1029 {-
1030 -- Ideally we would like to check these properties. However:
1031 -- * They should be checked only to debug the code. The client data
1032 -- should be able to trigger these.
1033 -- * What kind of error should they throw (we are in IO here)?
1034 -- * Should we keep modifyMVar?
1035 -- * Should we throw the validation in an Exception, catch it around
1036 -- modifyMVar and throw it back as an Error?
1037 assertValid $ transformable p q
1038 assertValid $ applicable p' (r ^. r_state)
1039 -}
1040 pure (r', Versioned (r' ^. r_version) q')
1041
1042 saveRepo
1043 pure vq'
1044
1045 -- This is a special case of tableNgramsPut where the input patch is empty.
1046 tableNgramsPull :: RepoCmdM env err m
1047 => ListId
1048 -> TableNgrams.NgramsType
1049 -> Version
1050 -> m (Versioned NgramsTablePatch)
1051 tableNgramsPull listId ngramsType p_version = do
1052 var <- view repoVar
1053 r <- liftBase $ readMVar var
1054
1055 let
1056 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
1057 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
1058
1059 pure (Versioned (r ^. r_version) q_table)
1060
1061 -- Apply the given patch to the DB and returns the patch to be applied on the
1062 -- client.
1063 -- TODO-ACCESS check
1064 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
1065 => TabType
1066 -> ListId
1067 -> Versioned NgramsTablePatch
1068 -> m (Versioned NgramsTablePatch)
1069 tableNgramsPut tabType listId (Versioned p_version p_table)
1070 | p_table == mempty = do
1071 let ngramsType = ngramsTypeFromTabType tabType
1072 tableNgramsPull listId ngramsType p_version
1073
1074 | otherwise = do
1075 let ngramsType = ngramsTypeFromTabType tabType
1076 (p0, p0_validity) = PM.singleton listId p_table
1077 (p, p_validity) = PM.singleton ngramsType p0
1078
1079 assertValid p0_validity
1080 assertValid p_validity
1081
1082 commitStatePatch (Versioned p_version p)
1083 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
1084
1085 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
1086 mergeNgramsElement _neOld neNew = neNew
1087 {-
1088 { _ne_list :: ListType
1089 If we merge the parents/children we can potentially create cycles!
1090 , _ne_parent :: Maybe NgramsTerm
1091 , _ne_children :: MSet NgramsTerm
1092 }
1093 -}
1094
1095 getNgramsTableMap :: RepoCmdM env err m
1096 => NodeId
1097 -> TableNgrams.NgramsType
1098 -> m (Versioned NgramsTableMap)
1099 getNgramsTableMap nodeId ngramsType = do
1100 v <- view repoVar
1101 repo <- liftBase $ readMVar v
1102 pure $ Versioned (repo ^. r_version)
1103 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1104
1105 dumpJsonTableMap :: RepoCmdM env err m
1106 => Text
1107 -> NodeId
1108 -> TableNgrams.NgramsType
1109 -> m ()
1110 dumpJsonTableMap fpath nodeId ngramsType = do
1111 m <- getNgramsTableMap nodeId ngramsType
1112 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
1113 pure ()
1114
1115 type MinSize = Int
1116 type MaxSize = Int
1117
1118 -- | TODO Errors management
1119 -- TODO: polymorphic for Annuaire or Corpus or ...
1120 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1121 -- TODO: should take only one ListId
1122
1123 getTime' :: MonadBase IO m => m TimeSpec
1124 getTime' = liftBase $ getTime ProcessCPUTime
1125
1126
1127 getTableNgrams :: forall env err m.
1128 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1129 => NodeType -> NodeId -> TabType
1130 -> ListId -> Limit -> Maybe Offset
1131 -> Maybe ListType
1132 -> Maybe MinSize -> Maybe MaxSize
1133 -> Maybe OrderBy
1134 -> (NgramsTerm -> Bool)
1135 -> m (Versioned NgramsTable)
1136 getTableNgrams _nType nId tabType listId limit_ offset
1137 listType minSize maxSize orderBy searchQuery = do
1138
1139 t0 <- getTime'
1140 -- lIds <- selectNodesWithUsername NodeList userMaster
1141 let
1142 ngramsType = ngramsTypeFromTabType tabType
1143 offset' = maybe 0 identity offset
1144 listType' = maybe (const True) (==) listType
1145 minSize' = maybe (const True) (<=) minSize
1146 maxSize' = maybe (const True) (>=) maxSize
1147
1148 selected_node n = minSize' s
1149 && maxSize' s
1150 && searchQuery (n ^. ne_ngrams)
1151 && listType' (n ^. ne_list)
1152 where
1153 s = n ^. ne_size
1154
1155 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1156
1157 ---------------------------------------
1158 sortOnOrder Nothing = identity
1159 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1160 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1161 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1162 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1163
1164 ---------------------------------------
1165 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1166 selectAndPaginate tableMap = roots <> inners
1167 where
1168 list = tableMap ^.. each
1169 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1170 (ne ^. ne_root)
1171 selected_nodes = list & take limit_
1172 . drop offset'
1173 . filter selected_node
1174 . sortOnOrder orderBy
1175 roots = rootOf <$> selected_nodes
1176 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1177 inners = list & filter (selected_inner rootsSet)
1178
1179 ---------------------------------------
1180 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1181 setScores False table = pure table
1182 setScores True table = do
1183 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
1184 t1 <- getTime'
1185 occurrences <- getOccByNgramsOnlyFast' nId
1186 listId
1187 ngramsType
1188 ngrams_terms
1189 t2 <- getTime'
1190 liftBase $ hprint stderr
1191 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1192 (length ngrams_terms) t1 t2
1193 {-
1194 occurrences <- getOccByNgramsOnlySlow nType nId
1195 (lIds <> [listId])
1196 ngramsType
1197 ngrams_terms
1198 -}
1199 let
1200 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
1201
1202 pure $ table & each %~ setOcc
1203 ---------------------------------------
1204
1205 -- lists <- catMaybes <$> listsWith userMaster
1206 -- trace (show lists) $
1207 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1208
1209 let scoresNeeded = needsScores orderBy
1210 tableMap1 <- getNgramsTableMap listId ngramsType
1211 t1 <- getTime'
1212 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1213 . Map.mapWithKey ngramsElementFromRepo
1214 t2 <- getTime'
1215 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1216 . setScores (not scoresNeeded)
1217 . selectAndPaginate
1218 t3 <- getTime'
1219 liftBase $ hprint stderr
1220 ("getTableNgrams total=" % timeSpecs
1221 % " map1=" % timeSpecs
1222 % " map2=" % timeSpecs
1223 % " map3=" % timeSpecs
1224 % " sql=" % (if scoresNeeded then "map2" else "map3")
1225 % "\n"
1226 ) t0 t3 t0 t1 t1 t2 t2 t3
1227 pure tableMap3
1228
1229
1230 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
1231 scoresRecomputeTableNgrams nId tabType listId = do
1232 tableMap <- getNgramsTableMap listId ngramsType
1233 _ <- tableMap & v_data %%~ setScores
1234 . Map.mapWithKey ngramsElementFromRepo
1235
1236 pure $ 1
1237 where
1238 ngramsType = ngramsTypeFromTabType tabType
1239
1240 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
1241 setScores table = do
1242 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
1243 occurrences <- getOccByNgramsOnlyFast' nId
1244 listId
1245 ngramsType
1246 ngrams_terms
1247 let
1248 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
1249
1250 pure $ table & each %~ setOcc
1251
1252
1253
1254 -- APIs
1255
1256 -- TODO: find a better place for the code above, All APIs stay here
1257 type QueryParamR = QueryParam' '[Required, Strict]
1258
1259 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1260 deriving (Generic, Enum, Bounded, Read, Show)
1261
1262 instance FromHttpApiData OrderBy
1263 where
1264 parseUrlPiece "TermAsc" = pure TermAsc
1265 parseUrlPiece "TermDesc" = pure TermDesc
1266 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1267 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1268 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1269
1270
1271 instance ToParamSchema OrderBy
1272 instance FromJSON OrderBy
1273 instance ToJSON OrderBy
1274 instance ToSchema OrderBy
1275 instance Arbitrary OrderBy
1276 where
1277 arbitrary = elements [minBound..maxBound]
1278
1279 needsScores :: Maybe OrderBy -> Bool
1280 needsScores (Just ScoreAsc) = True
1281 needsScores (Just ScoreDesc) = True
1282 needsScores _ = False
1283
1284 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1285 :> QueryParamR "ngramsType" TabType
1286 :> QueryParamR "list" ListId
1287 :> QueryParamR "limit" Limit
1288 :> QueryParam "offset" Offset
1289 :> QueryParam "listType" ListType
1290 :> QueryParam "minTermSize" MinSize
1291 :> QueryParam "maxTermSize" MaxSize
1292 :> QueryParam "orderBy" OrderBy
1293 :> QueryParam "search" Text
1294 :> Get '[JSON] (Versioned NgramsTable)
1295
1296 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1297 :> QueryParamR "ngramsType" TabType
1298 :> QueryParamR "list" ListId
1299 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1300 :> Put '[JSON] (Versioned NgramsTablePatch)
1301
1302 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
1303 :> QueryParamR "ngramsType" TabType
1304 :> QueryParamR "list" ListId
1305 :> "recompute" :> Post '[JSON] Int
1306
1307 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
1308 :> QueryParamR "ngramsType" TabType
1309 :> QueryParamR "list" ListId
1310 :> Get '[JSON] Version
1311
1312 type TableNgramsApi = TableNgramsApiGet
1313 :<|> TableNgramsApiPut
1314 :<|> RecomputeScoresNgramsApiGet
1315 :<|> "version" :> TableNgramsApiGetVersion
1316
1317 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1318 => NodeId
1319 -> TabType
1320 -> ListId
1321 -> Limit
1322 -> Maybe Offset
1323 -> Maybe ListType
1324 -> Maybe MinSize -> Maybe MaxSize
1325 -> Maybe OrderBy
1326 -> Maybe Text -- full text search
1327 -> m (Versioned NgramsTable)
1328 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1329 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1330 where
1331 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
1332
1333 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1334 => NodeId
1335 -> TabType
1336 -> ListId
1337 -> m Version
1338 getTableNgramsVersion _nId _tabType _listId = currentVersion
1339 -- TODO: limit?
1340 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
1341 -- This line above looks like a waste of computation to finally get only the version.
1342 -- See the comment about listNgramsChangedSince.
1343
1344
1345 -- | Text search is deactivated for now for ngrams by doc only
1346 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1347 => DocId -> TabType
1348 -> ListId -> Limit -> Maybe Offset
1349 -> Maybe ListType
1350 -> Maybe MinSize -> Maybe MaxSize
1351 -> Maybe OrderBy
1352 -> Maybe Text -- full text search
1353 -> m (Versioned NgramsTable)
1354 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1355 ns <- selectNodesWithUsername NodeList userMaster
1356 let ngramsType = ngramsTypeFromTabType tabType
1357 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1358 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
1359 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1360
1361
1362
1363 apiNgramsTableCorpus :: ( RepoCmdM env err m
1364 , HasNodeError err
1365 , HasInvalidError err
1366 , HasConnectionPool env
1367 , HasConfig env
1368 )
1369 => NodeId -> ServerT TableNgramsApi m
1370 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1371 :<|> tableNgramsPut
1372 :<|> scoresRecomputeTableNgrams cId
1373 :<|> getTableNgramsVersion cId
1374
1375 apiNgramsTableDoc :: ( RepoCmdM env err m
1376 , HasNodeError err
1377 , HasInvalidError err
1378 , HasConnectionPool env
1379 , HasConfig env
1380 )
1381 => DocId -> ServerT TableNgramsApi m
1382 apiNgramsTableDoc dId = getTableNgramsDoc dId
1383 :<|> tableNgramsPut
1384 :<|> scoresRecomputeTableNgrams dId
1385 :<|> getTableNgramsVersion dId
1386 -- > index all the corpus accordingly (TODO AD)
1387
1388 -- Did the given list of ngrams changed since the given version?
1389 -- The returned value is versioned boolean value, meaning that one always retrieve the
1390 -- latest version.
1391 -- If the given version is negative then one simply receive the latest version and True.
1392 -- Using this function is more precise than simply comparing the latest version number
1393 -- with the local version number. Indeed there might be no change to this particular list
1394 -- and still the version number has changed because of other lists.
1395 --
1396 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
1397 -- * currentVersion: good computation, good bandwidth, bad precision.
1398 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
1399 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
1400 listNgramsChangedSince :: RepoCmdM env err m
1401 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
1402 listNgramsChangedSince listId ngramsType version
1403 | version < 0 =
1404 Versioned <$> currentVersion <*> pure True
1405 | otherwise =
1406 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1407
1408 -- Instances
1409 instance Arbitrary NgramsRepoElement where
1410 arbitrary = elements $ map ngramsElementToRepo ns
1411 where
1412 NgramsTable ns = mockTable
1413
1414 --{-
1415 instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
1416 where
1417 parseUrlPiece x = maybeToEither x (decode $ cs x)