]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[TextFlow] MapList building, score needs normalization
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24 {-# OPTIONS -fno-warn-orphans #-}
25
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , setListNgrams
33 --, rmListNgrams TODO fix before exporting
34 , putListNgrams
35 --, putListNgrams'
36 , apiNgramsTableCorpus
37 , apiNgramsTableDoc
38
39 , NgramsStatePatch
40 , NgramsTablePatch
41 , NgramsTableMap
42
43 , NgramsTerm(..)
44
45 , NgramsElement(..)
46 , mkNgramsElement
47 , mergeNgramsElement
48
49 , RootParent(..)
50
51 , MSet
52 , mSetFromList
53 , mSetToList
54
55 , Repo(..)
56 , r_version
57 , r_state
58 , r_history
59 , NgramsRepo
60 , NgramsRepoElement(..)
61 , saveRepo
62 , initRepo
63
64 , RepoEnv(..)
65 , renv_var
66 , renv_lock
67
68 , TabType(..)
69 , ngramsTypeFromTabType
70
71 , HasRepoVar(..)
72 , HasRepoSaver(..)
73 , HasRepo(..)
74 , RepoCmdM
75 , QueryParamR
76 , TODO
77
78 -- Internals
79 , getNgramsTableMap
80 , dumpJsonTableMap
81 , tableNgramsPull
82 , tableNgramsPut
83
84 , Version
85 , Versioned(..)
86 , currentVersion
87 , listNgramsChangedSince
88 )
89 where
90
91 import Codec.Serialise (Serialise())
92 import Control.Category ((>>>))
93 import Control.Concurrent
94 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
95 import Control.Monad.Error.Class (MonadError)
96 import Control.Monad.Reader
97 import Control.Monad.State
98 import Control.Monad.Trans.Control (MonadBaseControl)
99 import Data.Aeson hiding ((.=))
100 import Data.Aeson.TH (deriveJSON)
101 import qualified Data.Aeson.Text as DAT
102 import Data.Either (Either(..))
103 import Data.Foldable
104 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
105 import qualified Data.List as List
106 import Data.Map.Strict (Map)
107 import qualified Data.Map.Strict as Map
108 import qualified Data.Map.Strict.Patch as PM
109 import Data.Maybe (fromMaybe)
110 import Data.Monoid
111 import Data.Ord (Down(..))
112 import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..),
113 PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours,
114 MaybePatch(Mod), unMod, old, new)
115 import Data.Set (Set)
116 import qualified Data.Set as S
117 import qualified Data.Set as Set
118 import Data.String (IsString, fromString)
119 import Data.Swagger hiding (version, patch)
120 import Data.Text (Text, isInfixOf, pack, strip, unpack)
121 import Data.Text.Lazy.IO as DTL
122 import Data.Validity
123 import Database.PostgreSQL.Simple.FromField (FromField, fromField, ResultError(ConversionFailed), returnError)
124 import Formatting (hprint, int, (%))
125 import Formatting.Clock (timeSpecs)
126 import GHC.Generics (Generic)
127 import Servant hiding (Patch)
128 import System.Clock (getTime, TimeSpec, Clock(..))
129 import System.FileLock (FileLock)
130 import System.IO (stderr)
131 import Test.QuickCheck (elements, frequency)
132 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
133
134 import Prelude (error)
135 import Protolude (maybeToEither)
136 import Gargantext.Prelude
137
138 import Gargantext.Core.Text (size)
139 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
140 import Gargantext.Core.Types (TODO)
141 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
142 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
143 import Gargantext.Database.Query.Table.Node.Select
144 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
145 import Gargantext.Database.Admin.Config (userMaster)
146 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
147 import Gargantext.Database.Admin.Types.Node (NodeType(..))
148 import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig)
149 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
150
151 ------------------------------------------------------------------------
152 --data FacetFormat = Table | Chart
153 data TabType = Docs | Trash | MoreFav | MoreTrash
154 | Terms | Sources | Authors | Institutes
155 | Contacts
156 deriving (Generic, Enum, Bounded, Show)
157
158 instance FromHttpApiData TabType
159 where
160 parseUrlPiece "Docs" = pure Docs
161 parseUrlPiece "Trash" = pure Trash
162 parseUrlPiece "MoreFav" = pure MoreFav
163 parseUrlPiece "MoreTrash" = pure MoreTrash
164
165 parseUrlPiece "Terms" = pure Terms
166 parseUrlPiece "Sources" = pure Sources
167 parseUrlPiece "Institutes" = pure Institutes
168 parseUrlPiece "Authors" = pure Authors
169
170 parseUrlPiece "Contacts" = pure Contacts
171
172 parseUrlPiece _ = Left "Unexpected value of TabType"
173
174 instance ToParamSchema TabType
175 instance ToJSON TabType
176 instance FromJSON TabType
177 instance ToSchema TabType
178 instance Arbitrary TabType
179 where
180 arbitrary = elements [minBound .. maxBound]
181
182 newtype MSet a = MSet (Map a ())
183 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
184
185 instance ToJSON a => ToJSON (MSet a) where
186 toJSON (MSet m) = toJSON (Map.keys m)
187 toEncoding (MSet m) = toEncoding (Map.keys m)
188
189 mSetFromSet :: Set a -> MSet a
190 mSetFromSet = MSet . Map.fromSet (const ())
191
192 mSetFromList :: Ord a => [a] -> MSet a
193 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
194
195 -- mSetToSet :: Ord a => MSet a -> Set a
196 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
197 mSetToSet :: Ord a => MSet a -> Set a
198 mSetToSet = Set.fromList . mSetToList
199
200 mSetToList :: MSet a -> [a]
201 mSetToList (MSet a) = Map.keys a
202
203 instance Foldable MSet where
204 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
205
206 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
207 parseJSON = fmap mSetFromList . parseJSON
208
209 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
210 -- TODO
211 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
212
213 ------------------------------------------------------------------------
214 newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
215 deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema)
216
217 instance FromJSONKey NgramsTerm where
218 fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
219
220 instance IsString NgramsTerm where
221 fromString s = NgramsTerm $ pack s
222
223 instance FromField NgramsTerm
224 where
225 fromField field mb = do
226 v <- fromField field mb
227 case fromJSON v of
228 Success a -> pure $ NgramsTerm $ strip a
229 Error _err -> returnError ConversionFailed field
230 $ List.intercalate " " [ "cannot parse hyperdata for JSON: "
231 , show v
232 ]
233
234 data RootParent = RootParent
235 { _rp_root :: NgramsTerm
236 , _rp_parent :: NgramsTerm
237 }
238 deriving (Ord, Eq, Show, Generic)
239
240 deriveJSON (unPrefix "_rp_") ''RootParent
241 makeLenses ''RootParent
242
243 data NgramsRepoElement = NgramsRepoElement
244 { _nre_size :: Int
245 , _nre_list :: ListType
246 --, _nre_root_parent :: Maybe RootParent
247 , _nre_root :: Maybe NgramsTerm
248 , _nre_parent :: Maybe NgramsTerm
249 , _nre_children :: MSet NgramsTerm
250 }
251 deriving (Ord, Eq, Show, Generic)
252
253 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
254 -- TODO
255 -- if ngrams & not size => size
256 -- drop occurrences
257
258 makeLenses ''NgramsRepoElement
259
260 instance ToSchema NgramsRepoElement where
261 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
262
263 instance Serialise (MSet NgramsTerm)
264 instance Serialise NgramsRepoElement
265
266 data NgramsElement =
267 NgramsElement { _ne_ngrams :: NgramsTerm
268 , _ne_size :: Int
269 , _ne_list :: ListType
270 , _ne_occurrences :: Int
271 , _ne_root :: Maybe NgramsTerm
272 , _ne_parent :: Maybe NgramsTerm
273 , _ne_children :: MSet NgramsTerm
274 }
275 deriving (Ord, Eq, Show, Generic)
276
277 deriveJSON (unPrefix "_ne_") ''NgramsElement
278 makeLenses ''NgramsElement
279
280 mkNgramsElement :: NgramsTerm
281 -> ListType
282 -> Maybe RootParent
283 -> MSet NgramsTerm
284 -> NgramsElement
285 mkNgramsElement ngrams list rp children =
286 NgramsElement ngrams (size (unNgramsTerm ngrams)) list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
287
288 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
289 newNgramsElement mayList ngrams =
290 mkNgramsElement ngrams (fromMaybe MapTerm mayList) Nothing mempty
291
292 instance ToSchema NgramsElement where
293 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
294 instance Arbitrary NgramsElement where
295 arbitrary = elements [newNgramsElement Nothing "sport"]
296
297 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
298 ngramsElementToRepo
299 (NgramsElement { _ne_size = s
300 , _ne_list = l
301 , _ne_root = r
302 , _ne_parent = p
303 , _ne_children = c
304 }) =
305 NgramsRepoElement
306 { _nre_size = s
307 , _nre_list = l
308 , _nre_parent = p
309 , _nre_root = r
310 , _nre_children = c
311 }
312
313 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
314 ngramsElementFromRepo
315 ngrams
316 (NgramsRepoElement
317 { _nre_size = s
318 , _nre_list = l
319 , _nre_parent = p
320 , _nre_root = r
321 , _nre_children = c
322 }) =
323 NgramsElement { _ne_size = s
324 , _ne_list = l
325 , _ne_root = r
326 , _ne_parent = p
327 , _ne_children = c
328 , _ne_ngrams = ngrams
329 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
330 {-
331 -- Here we could use 0 if we want to avoid any `panic`.
332 -- It will not happen using getTableNgrams if
333 -- getOccByNgramsOnly provides a count of occurrences for
334 -- all the ngrams given.
335 -}
336 }
337
338 ------------------------------------------------------------------------
339 newtype NgramsTable = NgramsTable [NgramsElement]
340 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
341
342 type NgramsList = NgramsTable
343
344 makePrisms ''NgramsTable
345
346 -- | Question: why these repetition of Type in this instance
347 -- may you document it please ?
348 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
349 each = _NgramsTable . each
350
351 -- TODO discuss
352 -- | TODO Check N and Weight
353 {-
354 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
355 toNgramsElement ns = map toNgramsElement' ns
356 where
357 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
358 where
359 p' = case p of
360 Nothing -> Nothing
361 Just x -> lookup x mapParent
362 c' = maybe mempty identity $ lookup t mapChildren
363 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
364
365 mapParent :: Map Int Text
366 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
367
368 mapChildren :: Map Text (Set Text)
369 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
370 $ Map.fromListWith (<>)
371 $ map (first fromJust)
372 $ filter (isJust . fst)
373 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
374 -}
375
376 mockTable :: NgramsTable
377 mockTable = NgramsTable
378 [ mkNgramsElement "animal" MapTerm Nothing (mSetFromList ["dog", "cat"])
379 , mkNgramsElement "cat" MapTerm (rp "animal") mempty
380 , mkNgramsElement "cats" StopTerm Nothing mempty
381 , mkNgramsElement "dog" MapTerm (rp "animal") (mSetFromList ["dogs"])
382 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
383 , mkNgramsElement "fox" MapTerm Nothing mempty
384 , mkNgramsElement "object" CandidateTerm Nothing mempty
385 , mkNgramsElement "nothing" StopTerm Nothing mempty
386 , mkNgramsElement "organic" MapTerm Nothing (mSetFromList ["flower"])
387 , mkNgramsElement "flower" MapTerm (rp "organic") mempty
388 , mkNgramsElement "moon" CandidateTerm Nothing mempty
389 , mkNgramsElement "sky" StopTerm Nothing mempty
390 ]
391 where
392 rp n = Just $ RootParent n n
393
394 instance Arbitrary NgramsTable where
395 arbitrary = pure mockTable
396
397 instance ToSchema NgramsTable
398
399 ------------------------------------------------------------------------
400 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
401 ------------------------------------------------------------------------
402 -- On the Client side:
403 --data Action = InGroup NgramsId NgramsId
404 -- | OutGroup NgramsId NgramsId
405 -- | SetListType NgramsId ListType
406
407 data PatchSet a = PatchSet
408 { _rem :: Set a
409 , _add :: Set a
410 }
411 deriving (Eq, Ord, Show, Generic)
412
413 makeLenses ''PatchSet
414 makePrisms ''PatchSet
415
416 instance ToJSON a => ToJSON (PatchSet a) where
417 toJSON = genericToJSON $ unPrefix "_"
418 toEncoding = genericToEncoding $ unPrefix "_"
419
420 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
421 parseJSON = genericParseJSON $ unPrefix "_"
422
423 {-
424 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
425 arbitrary = PatchSet <$> arbitrary <*> arbitrary
426
427 type instance Patched (PatchSet a) = Set a
428
429 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
430 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
431
432 instance Ord a => Semigroup (PatchSet a) where
433 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
434 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
435 } -- TODO Review
436
437 instance Ord a => Monoid (PatchSet a) where
438 mempty = PatchSet mempty mempty
439
440 instance Ord a => Group (PatchSet a) where
441 invert (PatchSet r a) = PatchSet a r
442
443 instance Ord a => Composable (PatchSet a) where
444 composable _ _ = undefined
445
446 instance Ord a => Action (PatchSet a) (Set a) where
447 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
448
449 instance Applicable (PatchSet a) (Set a) where
450 applicable _ _ = mempty
451
452 instance Ord a => Validity (PatchSet a) where
453 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
454
455 instance Ord a => Transformable (PatchSet a) where
456 transformable = undefined
457
458 conflicts _p _q = undefined
459
460 transformWith conflict p q = undefined conflict p q
461
462 instance ToSchema a => ToSchema (PatchSet a)
463 -}
464
465 type AddRem = Replace (Maybe ())
466
467 instance Serialise AddRem
468
469 remPatch, addPatch :: AddRem
470 remPatch = replace (Just ()) Nothing
471 addPatch = replace Nothing (Just ())
472
473 isRem :: Replace (Maybe ()) -> Bool
474 isRem = (== remPatch)
475
476 type PatchMap = PM.PatchMap
477
478
479 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
480 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid, Group,
481 Transformable, Composable)
482
483 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
484 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
485
486 instance (Serialise a, Ord a) => Serialise (PatchMap a AddRem)
487 instance (Serialise a, Ord a) => Serialise (PatchMSet a)
488
489 -- TODO this breaks module abstraction
490 makePrisms ''PM.PatchMap
491
492 makePrisms ''PatchMSet
493
494 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
495 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
496 where
497 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
498 f = Map.partition isRem >>> both %~ Map.keysSet
499
500 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
501 g (rems, adds) = Map.fromSet (const remPatch) rems
502 <> Map.fromSet (const addPatch) adds
503
504 instance Ord a => Action (PatchMSet a) (MSet a) where
505 act (PatchMSet p) (MSet m) = MSet $ act p m
506
507 instance Ord a => Applicable (PatchMSet a) (MSet a) where
508 applicable (PatchMSet p) (MSet m) = applicable p m
509
510 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
511 toJSON = toJSON . view _PatchMSetIso
512 toEncoding = toEncoding . view _PatchMSetIso
513
514 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
515 parseJSON = fmap (_PatchMSetIso #) . parseJSON
516
517 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
518 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
519
520 instance ToSchema a => ToSchema (PatchMSet a) where
521 -- TODO
522 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
523
524 type instance Patched (PatchMSet a) = MSet a
525
526 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
527 arbitrary = uncurry replace <$> arbitrary
528 -- If they happen to be equal then the patch is Keep.
529
530 instance ToSchema a => ToSchema (Replace a) where
531 declareNamedSchema (_ :: Proxy (Replace a)) = do
532 -- TODO Keep constructor is not supported here.
533 aSchema <- declareSchemaRef (Proxy :: Proxy a)
534 return $ NamedSchema (Just "Replace") $ mempty
535 & type_ ?~ SwaggerObject
536 & properties .~
537 InsOrdHashMap.fromList
538 [ ("old", aSchema)
539 , ("new", aSchema)
540 ]
541 & required .~ [ "old", "new" ]
542
543 data NgramsPatch
544 = NgramsPatch { _patch_children :: PatchMSet NgramsTerm
545 , _patch_list :: Replace ListType -- TODO Map UserId ListType
546 }
547 | NgramsReplace { _patch_old :: Maybe NgramsRepoElement
548 , _patch_new :: Maybe NgramsRepoElement
549 }
550 deriving (Eq, Show, Generic)
551
552 -- The JSON encoding is untagged, this is OK since the field names are disjoints and thus the encoding is unambiguous.
553 -- TODO: the empty object should be accepted and treated as mempty.
554 deriveJSON (unPrefixUntagged "_") ''NgramsPatch
555 makeLenses ''NgramsPatch
556
557 -- TODO: This instance is simplified since we should either have the fields children and/or list
558 -- or the fields old and/or new.
559 instance ToSchema NgramsPatch where
560 declareNamedSchema _ = do
561 childrenSch <- declareSchemaRef (Proxy :: Proxy (PatchMSet NgramsTerm))
562 listSch <- declareSchemaRef (Proxy :: Proxy (Replace ListType))
563 nreSch <- declareSchemaRef (Proxy :: Proxy NgramsRepoElement)
564 return $ NamedSchema (Just "NgramsPatch") $ mempty
565 & type_ ?~ SwaggerObject
566 & properties .~
567 InsOrdHashMap.fromList
568 [ ("children", childrenSch)
569 , ("list", listSch)
570 , ("old", nreSch)
571 , ("new", nreSch)
572 ]
573
574 instance Arbitrary NgramsPatch where
575 arbitrary = frequency [ (9, NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary))
576 , (1, NgramsReplace <$> arbitrary <*> arbitrary)
577 ]
578
579 instance Serialise NgramsPatch
580 instance Serialise (Replace ListType)
581
582 instance Serialise ListType
583
584 type NgramsPatchIso =
585 MaybePatch NgramsRepoElement (PairPatch (PatchMSet NgramsTerm) (Replace ListType))
586
587 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
588 _NgramsPatch = iso unwrap wrap
589 where
590 unwrap (NgramsPatch c l) = Mod $ PairPatch (c, l)
591 unwrap (NgramsReplace o n) = replace o n
592 wrap x =
593 case unMod x of
594 Just (PairPatch (c, l)) -> NgramsPatch c l
595 Nothing -> NgramsReplace (x ^? old . _Just) (x ^? new . _Just)
596
597 instance Semigroup NgramsPatch where
598 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
599
600 instance Monoid NgramsPatch where
601 mempty = _NgramsPatch # mempty
602
603 instance Validity NgramsPatch where
604 validate p = p ^. _NgramsPatch . to validate
605
606 instance Transformable NgramsPatch where
607 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
608
609 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
610
611 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
612 where
613 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
614
615 type ConflictResolutionNgramsPatch =
616 ( ConflictResolutionReplace (Maybe NgramsRepoElement)
617 , ( ConflictResolutionPatchMSet NgramsTerm
618 , ConflictResolutionReplace ListType
619 )
620 , (Bool, Bool)
621 )
622 type instance ConflictResolution NgramsPatch =
623 ConflictResolutionNgramsPatch
624
625 type PatchedNgramsPatch = Maybe NgramsRepoElement
626 type instance Patched NgramsPatch = PatchedNgramsPatch
627
628 instance Applicable (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
629 applicable (PairPatch (c, l)) n = applicable c (n ^. nre_children) <> applicable l (n ^. nre_list)
630
631 instance Action (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
632 act (PairPatch (c, l)) = (nre_children %~ act c)
633 . (nre_list %~ act l)
634
635 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
636 applicable p = applicable (p ^. _NgramsPatch)
637
638 instance Action NgramsPatch (Maybe NgramsRepoElement) where
639 act p = act (p ^. _NgramsPatch)
640
641 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
642 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
643
644 instance Serialise NgramsTablePatch
645 instance Serialise (PatchMap NgramsTerm NgramsPatch)
646
647 instance FromField NgramsTablePatch
648 where
649 fromField = fromField'
650
651 instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
652 where
653 fromField = fromField'
654
655 type instance ConflictResolution NgramsTablePatch =
656 NgramsTerm -> ConflictResolutionNgramsPatch
657
658 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
659 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
660 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
661
662 makePrisms ''NgramsTablePatch
663 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
664 instance ToSchema NgramsTablePatch
665
666 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
667 applicable p = applicable (p ^. _NgramsTablePatch)
668
669 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
670 act p =
671 fmap (execState (reParentNgramsTablePatch p)) .
672 act (p ^. _NgramsTablePatch)
673
674 instance Arbitrary NgramsTablePatch where
675 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
676
677 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
678 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
679 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
680
681 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
682
683 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
684 reRootChildren root ngram = do
685 nre <- use $ at ngram
686 forOf_ (_Just . nre_children . folded) nre $ \child -> do
687 at child . _Just . nre_root ?= root
688 reRootChildren root child
689
690 reParent :: Maybe RootParent -> ReParent NgramsTerm
691 reParent rp child = do
692 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
693 . (nre_root .~ (_rp_root <$> rp))
694 )
695 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
696
697 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
698 reParentAddRem rp child p =
699 reParent (if isRem p then Nothing else Just rp) child
700
701 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
702 reParentNgramsPatch parent ngramsPatch = do
703 root_of_parent <- use (at parent . _Just . nre_root)
704 let
705 root = fromMaybe parent root_of_parent
706 rp = RootParent { _rp_root = root, _rp_parent = parent }
707 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
708 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
709
710 reParentNgramsTablePatch :: ReParent NgramsTablePatch
711 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
712 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
713
714 ------------------------------------------------------------------------
715 ------------------------------------------------------------------------
716 type Version = Int
717
718 data Versioned a = Versioned
719 { _v_version :: Version
720 , _v_data :: a
721 }
722 deriving (Generic, Show, Eq)
723 deriveJSON (unPrefix "_v_") ''Versioned
724 makeLenses ''Versioned
725 instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where
726 declareNamedSchema = wellNamedSchema "_v_"
727 instance Arbitrary a => Arbitrary (Versioned a) where
728 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
729
730
731 {-
732 -- TODO sequences of modifications (Patchs)
733 type NgramsIdPatch = Patch NgramsId NgramsPatch
734
735 ngramsPatch :: Int -> NgramsPatch
736 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
737
738 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
739 toEdit n p = Edit n p
740 ngramsIdPatch :: Patch NgramsId NgramsPatch
741 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
742 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
743 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
744 ]
745
746 -- applyPatchBack :: Patch -> IO Patch
747 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
748 -}
749 ------------------------------------------------------------------------
750 ------------------------------------------------------------------------
751 ------------------------------------------------------------------------
752
753 {-
754 -- TODO: Replace.old is ignored which means that if the current list
755 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
756 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
757 -- However this should not happen in non conflicting situations.
758 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
759 mkListsUpdate nt patches =
760 [ (ngramsTypeId nt, ng, listTypeId lt)
761 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
762 , lt <- patch ^.. patch_list . new
763 ]
764
765 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
766 -> NgramsType
767 -> NgramsTablePatch
768 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
769 mkChildrenGroups addOrRem nt patches =
770 [ (ngramsTypeId nt, parent, child)
771 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
772 , child <- patch ^.. patch_children . to addOrRem . folded
773 ]
774 -}
775
776 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
777 ngramsTypeFromTabType tabType =
778 let lieu = "Garg.API.Ngrams: " :: Text in
779 case tabType of
780 Sources -> TableNgrams.Sources
781 Authors -> TableNgrams.Authors
782 Institutes -> TableNgrams.Institutes
783 Terms -> TableNgrams.NgramsTerms
784 _ -> panic $ lieu <> "No Ngrams for this tab"
785 -- TODO: This `panic` would disapear with custom NgramsType.
786
787 ------------------------------------------------------------------------
788 data Repo s p = Repo
789 { _r_version :: Version
790 , _r_state :: s
791 , _r_history :: [p]
792 -- first patch in the list is the most recent
793 }
794 deriving (Generic)
795
796 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
797 parseJSON = genericParseJSON $ unPrefix "_r_"
798
799 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
800 toJSON = genericToJSON $ unPrefix "_r_"
801 toEncoding = genericToEncoding $ unPrefix "_r_"
802
803 instance (Serialise s, Serialise p) => Serialise (Repo s p)
804
805 makeLenses ''Repo
806
807 initRepo :: Monoid s => Repo s p
808 initRepo = Repo 1 mempty []
809
810 type NgramsRepo = Repo NgramsState NgramsStatePatch
811 type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
812 type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
813
814 instance Serialise (PM.PatchMap NodeId NgramsTablePatch)
815 instance Serialise NgramsStatePatch
816
817 initMockRepo :: NgramsRepo
818 initMockRepo = Repo 1 s []
819 where
820 s = Map.singleton TableNgrams.NgramsTerms
821 $ Map.singleton 47254
822 $ Map.fromList
823 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
824
825 data RepoEnv = RepoEnv
826 { _renv_var :: !(MVar NgramsRepo)
827 , _renv_saver :: !(IO ())
828 , _renv_lock :: !FileLock
829 }
830 deriving (Generic)
831
832 makeLenses ''RepoEnv
833
834 class HasRepoVar env where
835 repoVar :: Getter env (MVar NgramsRepo)
836
837 instance HasRepoVar (MVar NgramsRepo) where
838 repoVar = identity
839
840 class HasRepoSaver env where
841 repoSaver :: Getter env (IO ())
842
843 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
844 repoEnv :: Getter env RepoEnv
845
846 instance HasRepo RepoEnv where
847 repoEnv = identity
848
849 instance HasRepoVar RepoEnv where
850 repoVar = renv_var
851
852 instance HasRepoSaver RepoEnv where
853 repoSaver = renv_saver
854
855 type RepoCmdM env err m =
856 ( MonadReader env m
857 , MonadError err m
858 , MonadBaseControl IO m
859 , HasRepo env
860 )
861 ------------------------------------------------------------------------
862
863 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
864 => m ()
865 saveRepo = liftBase =<< view repoSaver
866
867 listTypeConflictResolution :: ListType -> ListType -> ListType
868 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
869
870 ngramsStatePatchConflictResolution
871 :: TableNgrams.NgramsType
872 -> NodeId
873 -> NgramsTerm
874 -> ConflictResolutionNgramsPatch
875 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
876 = (ours, (const ours, ours), (False, False))
877 -- (False, False) mean here that Mod has always priority.
878 -- (True, False) <- would mean priority to the left (same as ours).
879
880 -- undefined {- TODO think this through -}, listTypeConflictResolution)
881
882 -- Current state:
883 -- Insertions are not considered as patches,
884 -- they do not extend history,
885 -- they do not bump version.
886 insertNewOnly :: a -> Maybe b -> a
887 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
888 -- TODO error handling
889
890 something :: Monoid a => Maybe a -> a
891 something Nothing = mempty
892 something (Just a) = a
893
894 {- unused
895 -- TODO refactor with putListNgrams
896 copyListNgrams :: RepoCmdM env err m
897 => NodeId -> NodeId -> NgramsType
898 -> m ()
899 copyListNgrams srcListId dstListId ngramsType = do
900 var <- view repoVar
901 liftBase $ modifyMVar_ var $
902 pure . (r_state . at ngramsType %~ (Just . f . something))
903 saveRepo
904 where
905 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
906 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
907
908 -- TODO refactor with putListNgrams
909 -- The list must be non-empty!
910 -- The added ngrams must be non-existent!
911 addListNgrams :: RepoCmdM env err m
912 => NodeId -> NgramsType
913 -> [NgramsElement] -> m ()
914 addListNgrams listId ngramsType nes = do
915 var <- view repoVar
916 liftBase $ modifyMVar_ var $
917 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
918 saveRepo
919 where
920 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
921 -}
922
923 -- UNSAFE
924 rmListNgrams :: RepoCmdM env err m
925 => ListId
926 -> TableNgrams.NgramsType
927 -> m ()
928 rmListNgrams l nt = setListNgrams l nt mempty
929
930 -- | TODO: incr the Version number
931 -- && should use patch
932 -- UNSAFE
933 setListNgrams :: RepoCmdM env err m
934 => NodeId
935 -> TableNgrams.NgramsType
936 -> Map NgramsTerm NgramsRepoElement
937 -> m ()
938 setListNgrams listId ngramsType ns = do
939 var <- view repoVar
940 liftBase $ modifyMVar_ var $
941 pure . ( r_state
942 . at ngramsType %~
943 (Just .
944 (at listId .~ ( Just ns))
945 . something
946 )
947 )
948 saveRepo
949
950 -- NOTE
951 -- This is no longer part of the API.
952 -- This function is maintained for its usage in Database.Action.Flow.List.
953 -- If the given list of ngrams elements contains ngrams already in
954 -- the repo, they will be ignored.
955 putListNgrams :: (HasInvalidError err, RepoCmdM env err m)
956 => NodeId
957 -> TableNgrams.NgramsType
958 -> [NgramsElement]
959 -> m ()
960 putListNgrams _ _ [] = pure ()
961 putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
962 where
963 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
964
965 putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
966 => NodeId
967 -> TableNgrams.NgramsType
968 -> Map NgramsTerm NgramsRepoElement
969 -> m ()
970 putListNgrams' nodeId ngramsType ns = do
971 -- printDebug "[putListNgrams'] nodeId" nodeId
972 -- printDebug "[putListNgrams'] ngramsType" ngramsType
973 -- printDebug "[putListNgrams'] ns" ns
974
975 let p1 = NgramsTablePatch . PM.fromMap $ NgramsReplace Nothing . Just <$> ns
976 (p0, p0_validity) = PM.singleton nodeId p1
977 (p, p_validity) = PM.singleton ngramsType p0
978 assertValid p0_validity
979 assertValid p_validity
980 {-
981 -- TODO
982 v <- currentVersion
983 q <- commitStatePatch (Versioned v p)
984 assert empty q
985 -- What if another commit comes in between?
986 -- Shall we have a blindCommitStatePatch? It would not ask for a version but just a patch.
987 -- The modifyMVar_ would test the patch with applicable first.
988 -- If valid the rest would be atomic and no merge is required.
989 -}
990 var <- view repoVar
991 liftBase $ modifyMVar_ var $ \r -> do
992 pure $ r & r_version +~ 1
993 & r_history %~ (p :)
994 & r_state . at ngramsType %~
995 (Just .
996 (at nodeId %~
997 ( Just
998 . (<> ns)
999 . something
1000 )
1001 )
1002 . something
1003 )
1004 saveRepo
1005
1006
1007 currentVersion :: RepoCmdM env err m
1008 => m Version
1009 currentVersion = do
1010 var <- view repoVar
1011 r <- liftBase $ readMVar var
1012 pure $ r ^. r_version
1013
1014
1015 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
1016 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
1017 commitStatePatch (Versioned p_version p) = do
1018 var <- view repoVar
1019 vq' <- liftBase $ modifyMVar var $ \r -> do
1020 let
1021 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
1022 (p', q') = transformWith ngramsStatePatchConflictResolution p q
1023 r' = r & r_version +~ 1
1024 & r_state %~ act p'
1025 & r_history %~ (p' :)
1026 {-
1027 -- Ideally we would like to check these properties. However:
1028 -- * They should be checked only to debug the code. The client data
1029 -- should be able to trigger these.
1030 -- * What kind of error should they throw (we are in IO here)?
1031 -- * Should we keep modifyMVar?
1032 -- * Should we throw the validation in an Exception, catch it around
1033 -- modifyMVar and throw it back as an Error?
1034 assertValid $ transformable p q
1035 assertValid $ applicable p' (r ^. r_state)
1036 -}
1037 pure (r', Versioned (r' ^. r_version) q')
1038
1039 saveRepo
1040 pure vq'
1041
1042 -- This is a special case of tableNgramsPut where the input patch is empty.
1043 tableNgramsPull :: RepoCmdM env err m
1044 => ListId
1045 -> TableNgrams.NgramsType
1046 -> Version
1047 -> m (Versioned NgramsTablePatch)
1048 tableNgramsPull listId ngramsType p_version = do
1049 var <- view repoVar
1050 r <- liftBase $ readMVar var
1051
1052 let
1053 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
1054 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
1055
1056 pure (Versioned (r ^. r_version) q_table)
1057
1058 -- Apply the given patch to the DB and returns the patch to be applied on the
1059 -- client.
1060 -- TODO-ACCESS check
1061 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
1062 => TabType
1063 -> ListId
1064 -> Versioned NgramsTablePatch
1065 -> m (Versioned NgramsTablePatch)
1066 tableNgramsPut tabType listId (Versioned p_version p_table)
1067 | p_table == mempty = do
1068 let ngramsType = ngramsTypeFromTabType tabType
1069 tableNgramsPull listId ngramsType p_version
1070
1071 | otherwise = do
1072 let ngramsType = ngramsTypeFromTabType tabType
1073 (p0, p0_validity) = PM.singleton listId p_table
1074 (p, p_validity) = PM.singleton ngramsType p0
1075
1076 assertValid p0_validity
1077 assertValid p_validity
1078
1079 commitStatePatch (Versioned p_version p)
1080 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
1081
1082 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
1083 mergeNgramsElement _neOld neNew = neNew
1084 {-
1085 { _ne_list :: ListType
1086 If we merge the parents/children we can potentially create cycles!
1087 , _ne_parent :: Maybe NgramsTerm
1088 , _ne_children :: MSet NgramsTerm
1089 }
1090 -}
1091
1092 getNgramsTableMap :: RepoCmdM env err m
1093 => NodeId
1094 -> TableNgrams.NgramsType
1095 -> m (Versioned NgramsTableMap)
1096 getNgramsTableMap nodeId ngramsType = do
1097 v <- view repoVar
1098 repo <- liftBase $ readMVar v
1099 pure $ Versioned (repo ^. r_version)
1100 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1101
1102 dumpJsonTableMap :: RepoCmdM env err m
1103 => Text
1104 -> NodeId
1105 -> TableNgrams.NgramsType
1106 -> m ()
1107 dumpJsonTableMap fpath nodeId ngramsType = do
1108 m <- getNgramsTableMap nodeId ngramsType
1109 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
1110 pure ()
1111
1112 type MinSize = Int
1113 type MaxSize = Int
1114
1115 -- | TODO Errors management
1116 -- TODO: polymorphic for Annuaire or Corpus or ...
1117 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1118 -- TODO: should take only one ListId
1119
1120 getTime' :: MonadBase IO m => m TimeSpec
1121 getTime' = liftBase $ getTime ProcessCPUTime
1122
1123
1124 getTableNgrams :: forall env err m.
1125 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1126 => NodeType -> NodeId -> TabType
1127 -> ListId -> Limit -> Maybe Offset
1128 -> Maybe ListType
1129 -> Maybe MinSize -> Maybe MaxSize
1130 -> Maybe OrderBy
1131 -> (NgramsTerm -> Bool)
1132 -> m (Versioned NgramsTable)
1133 getTableNgrams _nType nId tabType listId limit_ offset
1134 listType minSize maxSize orderBy searchQuery = do
1135
1136 t0 <- getTime'
1137 -- lIds <- selectNodesWithUsername NodeList userMaster
1138 let
1139 ngramsType = ngramsTypeFromTabType tabType
1140 offset' = maybe 0 identity offset
1141 listType' = maybe (const True) (==) listType
1142 minSize' = maybe (const True) (<=) minSize
1143 maxSize' = maybe (const True) (>=) maxSize
1144
1145 selected_node n = minSize' s
1146 && maxSize' s
1147 && searchQuery (n ^. ne_ngrams)
1148 && listType' (n ^. ne_list)
1149 where
1150 s = n ^. ne_size
1151
1152 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1153
1154 ---------------------------------------
1155 sortOnOrder Nothing = identity
1156 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1157 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1158 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1159 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1160
1161 ---------------------------------------
1162 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1163 selectAndPaginate tableMap = roots <> inners
1164 where
1165 list = tableMap ^.. each
1166 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1167 (ne ^. ne_root)
1168 selected_nodes = list & take limit_
1169 . drop offset'
1170 . filter selected_node
1171 . sortOnOrder orderBy
1172 roots = rootOf <$> selected_nodes
1173 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1174 inners = list & filter (selected_inner rootsSet)
1175
1176 ---------------------------------------
1177 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1178 setScores False table = pure table
1179 setScores True table = do
1180 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
1181 t1 <- getTime'
1182 occurrences <- getOccByNgramsOnlyFast' nId
1183 listId
1184 ngramsType
1185 ngrams_terms
1186 t2 <- getTime'
1187 liftBase $ hprint stderr
1188 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1189 (length ngrams_terms) t1 t2
1190 {-
1191 occurrences <- getOccByNgramsOnlySlow nType nId
1192 (lIds <> [listId])
1193 ngramsType
1194 ngrams_terms
1195 -}
1196 let
1197 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
1198
1199 pure $ table & each %~ setOcc
1200 ---------------------------------------
1201
1202 -- lists <- catMaybes <$> listsWith userMaster
1203 -- trace (show lists) $
1204 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1205
1206 let scoresNeeded = needsScores orderBy
1207 tableMap1 <- getNgramsTableMap listId ngramsType
1208 t1 <- getTime'
1209 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1210 . Map.mapWithKey ngramsElementFromRepo
1211 t2 <- getTime'
1212 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1213 . setScores (not scoresNeeded)
1214 . selectAndPaginate
1215 t3 <- getTime'
1216 liftBase $ hprint stderr
1217 ("getTableNgrams total=" % timeSpecs
1218 % " map1=" % timeSpecs
1219 % " map2=" % timeSpecs
1220 % " map3=" % timeSpecs
1221 % " sql=" % (if scoresNeeded then "map2" else "map3")
1222 % "\n"
1223 ) t0 t3 t0 t1 t1 t2 t2 t3
1224 pure tableMap3
1225
1226
1227 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
1228 scoresRecomputeTableNgrams nId tabType listId = do
1229 tableMap <- getNgramsTableMap listId ngramsType
1230 _ <- tableMap & v_data %%~ setScores
1231 . Map.mapWithKey ngramsElementFromRepo
1232
1233 pure $ 1
1234 where
1235 ngramsType = ngramsTypeFromTabType tabType
1236
1237 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
1238 setScores table = do
1239 let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
1240 occurrences <- getOccByNgramsOnlyFast' nId
1241 listId
1242 ngramsType
1243 ngrams_terms
1244 let
1245 setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
1246
1247 pure $ table & each %~ setOcc
1248
1249
1250
1251 -- APIs
1252
1253 -- TODO: find a better place for the code above, All APIs stay here
1254 type QueryParamR = QueryParam' '[Required, Strict]
1255
1256 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1257 deriving (Generic, Enum, Bounded, Read, Show)
1258
1259 instance FromHttpApiData OrderBy
1260 where
1261 parseUrlPiece "TermAsc" = pure TermAsc
1262 parseUrlPiece "TermDesc" = pure TermDesc
1263 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1264 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1265 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1266
1267
1268 instance ToParamSchema OrderBy
1269 instance FromJSON OrderBy
1270 instance ToJSON OrderBy
1271 instance ToSchema OrderBy
1272 instance Arbitrary OrderBy
1273 where
1274 arbitrary = elements [minBound..maxBound]
1275
1276 needsScores :: Maybe OrderBy -> Bool
1277 needsScores (Just ScoreAsc) = True
1278 needsScores (Just ScoreDesc) = True
1279 needsScores _ = False
1280
1281 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1282 :> QueryParamR "ngramsType" TabType
1283 :> QueryParamR "list" ListId
1284 :> QueryParamR "limit" Limit
1285 :> QueryParam "offset" Offset
1286 :> QueryParam "listType" ListType
1287 :> QueryParam "minTermSize" MinSize
1288 :> QueryParam "maxTermSize" MaxSize
1289 :> QueryParam "orderBy" OrderBy
1290 :> QueryParam "search" Text
1291 :> Get '[JSON] (Versioned NgramsTable)
1292
1293 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1294 :> QueryParamR "ngramsType" TabType
1295 :> QueryParamR "list" ListId
1296 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1297 :> Put '[JSON] (Versioned NgramsTablePatch)
1298
1299 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
1300 :> QueryParamR "ngramsType" TabType
1301 :> QueryParamR "list" ListId
1302 :> "recompute" :> Post '[JSON] Int
1303
1304 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
1305 :> QueryParamR "ngramsType" TabType
1306 :> QueryParamR "list" ListId
1307 :> Get '[JSON] Version
1308
1309 type TableNgramsApi = TableNgramsApiGet
1310 :<|> TableNgramsApiPut
1311 :<|> RecomputeScoresNgramsApiGet
1312 :<|> "version" :> TableNgramsApiGetVersion
1313
1314 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1315 => NodeId
1316 -> TabType
1317 -> ListId
1318 -> Limit
1319 -> Maybe Offset
1320 -> Maybe ListType
1321 -> Maybe MinSize -> Maybe MaxSize
1322 -> Maybe OrderBy
1323 -> Maybe Text -- full text search
1324 -> m (Versioned NgramsTable)
1325 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1326 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1327 where
1328 searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
1329
1330 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1331 => NodeId
1332 -> TabType
1333 -> ListId
1334 -> m Version
1335 getTableNgramsVersion _nId _tabType _listId = currentVersion
1336 -- TODO: limit?
1337 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
1338 -- This line above looks like a waste of computation to finally get only the version.
1339 -- See the comment about listNgramsChangedSince.
1340
1341
1342 -- | Text search is deactivated for now for ngrams by doc only
1343 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1344 => DocId -> TabType
1345 -> ListId -> Limit -> Maybe Offset
1346 -> Maybe ListType
1347 -> Maybe MinSize -> Maybe MaxSize
1348 -> Maybe OrderBy
1349 -> Maybe Text -- full text search
1350 -> m (Versioned NgramsTable)
1351 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1352 ns <- selectNodesWithUsername NodeList userMaster
1353 let ngramsType = ngramsTypeFromTabType tabType
1354 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1355 let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
1356 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1357
1358
1359
1360 apiNgramsTableCorpus :: ( RepoCmdM env err m
1361 , HasNodeError err
1362 , HasInvalidError err
1363 , HasConnectionPool env
1364 , HasConfig env
1365 )
1366 => NodeId -> ServerT TableNgramsApi m
1367 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1368 :<|> tableNgramsPut
1369 :<|> scoresRecomputeTableNgrams cId
1370 :<|> getTableNgramsVersion cId
1371
1372 apiNgramsTableDoc :: ( RepoCmdM env err m
1373 , HasNodeError err
1374 , HasInvalidError err
1375 , HasConnectionPool env
1376 , HasConfig env
1377 )
1378 => DocId -> ServerT TableNgramsApi m
1379 apiNgramsTableDoc dId = getTableNgramsDoc dId
1380 :<|> tableNgramsPut
1381 :<|> scoresRecomputeTableNgrams dId
1382 :<|> getTableNgramsVersion dId
1383 -- > index all the corpus accordingly (TODO AD)
1384
1385 -- Did the given list of ngrams changed since the given version?
1386 -- The returned value is versioned boolean value, meaning that one always retrieve the
1387 -- latest version.
1388 -- If the given version is negative then one simply receive the latest version and True.
1389 -- Using this function is more precise than simply comparing the latest version number
1390 -- with the local version number. Indeed there might be no change to this particular list
1391 -- and still the version number has changed because of other lists.
1392 --
1393 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
1394 -- * currentVersion: good computation, good bandwidth, bad precision.
1395 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
1396 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
1397 listNgramsChangedSince :: RepoCmdM env err m
1398 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
1399 listNgramsChangedSince listId ngramsType version
1400 | version < 0 =
1401 Versioned <$> currentVersion <*> pure True
1402 | otherwise =
1403 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1404
1405 -- Instances
1406 instance Arbitrary NgramsRepoElement where
1407 arbitrary = elements $ map ngramsElementToRepo ns
1408 where
1409 NgramsTable ns = mockTable
1410
1411 --{-
1412 instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
1413 where
1414 parseUrlPiece x = maybeToEither x (decode $ cs x)