]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Ngrams.hs
[FIX] #205 GraphExplorer recomputing (needs refactoring TODO)
[gargantext.git] / src / Gargantext / API / Ngrams.hs
1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
2 {-|
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
9 Portability : POSIX
10
11 Ngrams API
12
13 -- | TODO
14 get ngrams filtered by NgramsType
15 add get
16
17 -}
18
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24 {-# OPTIONS -fno-warn-orphans #-}
25
26 module Gargantext.API.Ngrams
27 ( TableNgramsApi
28 , TableNgramsApiGet
29 , TableNgramsApiPut
30
31 , getTableNgrams
32 , setListNgrams
33 --, rmListNgrams TODO fix before exporting
34 , putListNgrams
35 --, putListNgrams'
36 , apiNgramsTableCorpus
37 , apiNgramsTableDoc
38
39 , NgramsStatePatch
40 , NgramsTablePatch
41 , NgramsTableMap
42
43 , NgramsElement(..)
44 , mkNgramsElement
45 , mergeNgramsElement
46
47 , RootParent(..)
48
49 , MSet
50 , mSetFromList
51 , mSetToList
52
53 , Repo(..)
54 , r_version
55 , r_state
56 , r_history
57 , NgramsRepo
58 , NgramsRepoElement(..)
59 , saveRepo
60 , initRepo
61
62 , RepoEnv(..)
63 , renv_var
64 , renv_lock
65
66 , TabType(..)
67 , ngramsTypeFromTabType
68
69 , HasRepoVar(..)
70 , HasRepoSaver(..)
71 , HasRepo(..)
72 , RepoCmdM
73 , QueryParamR
74 , TODO
75
76 -- Internals
77 , getNgramsTableMap
78 , dumpJsonTableMap
79 , tableNgramsPull
80 , tableNgramsPut
81
82 , Version
83 , Versioned(..)
84 , currentVersion
85 , listNgramsChangedSince
86 )
87 where
88
89 import Codec.Serialise (Serialise())
90 import Control.Category ((>>>))
91 import Control.Concurrent
92 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
93 import Control.Monad.Base (MonadBase, liftBase)
94 import Control.Monad.Error.Class (MonadError)
95 import Control.Monad.Reader
96 import Control.Monad.State
97 import Control.Monad.Trans.Control (MonadBaseControl)
98 import Data.Aeson hiding ((.=))
99 import Data.Aeson.TH (deriveJSON)
100 import qualified Data.Aeson.Text as DAT
101 import Data.Either (Either(..))
102 import Data.Foldable
103 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
104 import qualified Data.List as List
105 import Data.Map.Strict (Map)
106 import qualified Data.Map.Strict as Map
107 import qualified Data.Map.Strict.Patch as PM
108 import Data.Maybe (fromMaybe)
109 import Data.Monoid
110 import Data.Ord (Down(..))
111 import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..),
112 PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours,
113 MaybePatch(Mod), unMod, old, new)
114 import Data.Set (Set)
115 import qualified Data.Set as S
116 import qualified Data.Set as Set
117 import Data.Swagger hiding (version, patch)
118 import Data.Text (Text, count, isInfixOf, unpack)
119 import Data.Text.Lazy.IO as DTL
120 import Data.Validity
121 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
122 import Formatting (hprint, int, (%))
123 import Formatting.Clock (timeSpecs)
124 import GHC.Generics (Generic)
125 import Servant hiding (Patch)
126 import System.Clock (getTime, TimeSpec, Clock(..))
127 import System.FileLock (FileLock)
128 import System.IO (stderr)
129 import Test.QuickCheck (elements, frequency)
130 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
131
132 import Prelude (error)
133 import Protolude (maybeToEither)
134 import Gargantext.Prelude
135
136 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
137 import Gargantext.Core.Types (TODO)
138 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
139 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
140 import Gargantext.Database.Query.Table.Node.Select
141 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
142 import Gargantext.Database.Admin.Config (userMaster)
143 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
144 import Gargantext.Database.Admin.Types.Node (NodeType(..))
145 import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig)
146 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
147
148 ------------------------------------------------------------------------
149 --data FacetFormat = Table | Chart
150 data TabType = Docs | Trash | MoreFav | MoreTrash
151 | Terms | Sources | Authors | Institutes
152 | Contacts
153 deriving (Generic, Enum, Bounded, Show)
154
155 instance FromHttpApiData TabType
156 where
157 parseUrlPiece "Docs" = pure Docs
158 parseUrlPiece "Trash" = pure Trash
159 parseUrlPiece "MoreFav" = pure MoreFav
160 parseUrlPiece "MoreTrash" = pure MoreTrash
161
162 parseUrlPiece "Terms" = pure Terms
163 parseUrlPiece "Sources" = pure Sources
164 parseUrlPiece "Institutes" = pure Institutes
165 parseUrlPiece "Authors" = pure Authors
166
167 parseUrlPiece "Contacts" = pure Contacts
168
169 parseUrlPiece _ = Left "Unexpected value of TabType"
170
171 instance ToParamSchema TabType
172 instance ToJSON TabType
173 instance FromJSON TabType
174 instance ToSchema TabType
175 instance Arbitrary TabType
176 where
177 arbitrary = elements [minBound .. maxBound]
178
179 newtype MSet a = MSet (Map a ())
180 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
181
182 instance ToJSON a => ToJSON (MSet a) where
183 toJSON (MSet m) = toJSON (Map.keys m)
184 toEncoding (MSet m) = toEncoding (Map.keys m)
185
186 mSetFromSet :: Set a -> MSet a
187 mSetFromSet = MSet . Map.fromSet (const ())
188
189 mSetFromList :: Ord a => [a] -> MSet a
190 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
191
192 -- mSetToSet :: Ord a => MSet a -> Set a
193 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
194 mSetToSet :: Ord a => MSet a -> Set a
195 mSetToSet = Set.fromList . mSetToList
196
197 mSetToList :: MSet a -> [a]
198 mSetToList (MSet a) = Map.keys a
199
200 instance Foldable MSet where
201 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
202
203 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
204 parseJSON = fmap mSetFromList . parseJSON
205
206 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
207 -- TODO
208 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
209
210 ------------------------------------------------------------------------
211 type NgramsTerm = Text
212
213 data RootParent = RootParent
214 { _rp_root :: NgramsTerm
215 , _rp_parent :: NgramsTerm
216 }
217 deriving (Ord, Eq, Show, Generic)
218
219 deriveJSON (unPrefix "_rp_") ''RootParent
220 makeLenses ''RootParent
221
222 data NgramsRepoElement = NgramsRepoElement
223 { _nre_size :: Int
224 , _nre_list :: ListType
225 --, _nre_root_parent :: Maybe RootParent
226 , _nre_root :: Maybe NgramsTerm
227 , _nre_parent :: Maybe NgramsTerm
228 , _nre_children :: MSet NgramsTerm
229 }
230 deriving (Ord, Eq, Show, Generic)
231
232 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
233 -- TODO
234 -- if ngrams & not size => size
235 -- drop occurrences
236
237 makeLenses ''NgramsRepoElement
238
239 instance ToSchema NgramsRepoElement where
240 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
241
242 instance Serialise (MSet NgramsTerm)
243 instance Serialise NgramsRepoElement
244
245 data NgramsElement =
246 NgramsElement { _ne_ngrams :: NgramsTerm
247 , _ne_size :: Int
248 , _ne_list :: ListType
249 , _ne_occurrences :: Int
250 , _ne_root :: Maybe NgramsTerm
251 , _ne_parent :: Maybe NgramsTerm
252 , _ne_children :: MSet NgramsTerm
253 }
254 deriving (Ord, Eq, Show, Generic)
255
256 deriveJSON (unPrefix "_ne_") ''NgramsElement
257 makeLenses ''NgramsElement
258
259 mkNgramsElement :: NgramsTerm
260 -> ListType
261 -> Maybe RootParent
262 -> MSet NgramsTerm
263 -> NgramsElement
264 mkNgramsElement ngrams list rp children =
265 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
266 where
267 -- TODO review
268 size = 1 + count " " ngrams
269
270 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
271 newNgramsElement mayList ngrams =
272 mkNgramsElement ngrams (fromMaybe MapTerm mayList) Nothing mempty
273
274 instance ToSchema NgramsElement where
275 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
276 instance Arbitrary NgramsElement where
277 arbitrary = elements [newNgramsElement Nothing "sport"]
278
279 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
280 ngramsElementToRepo
281 (NgramsElement { _ne_size = s
282 , _ne_list = l
283 , _ne_root = r
284 , _ne_parent = p
285 , _ne_children = c
286 }) =
287 NgramsRepoElement
288 { _nre_size = s
289 , _nre_list = l
290 , _nre_parent = p
291 , _nre_root = r
292 , _nre_children = c
293 }
294
295 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
296 ngramsElementFromRepo
297 ngrams
298 (NgramsRepoElement
299 { _nre_size = s
300 , _nre_list = l
301 , _nre_parent = p
302 , _nre_root = r
303 , _nre_children = c
304 }) =
305 NgramsElement { _ne_size = s
306 , _ne_list = l
307 , _ne_root = r
308 , _ne_parent = p
309 , _ne_children = c
310 , _ne_ngrams = ngrams
311 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
312 {-
313 -- Here we could use 0 if we want to avoid any `panic`.
314 -- It will not happen using getTableNgrams if
315 -- getOccByNgramsOnly provides a count of occurrences for
316 -- all the ngrams given.
317 -}
318 }
319
320 ------------------------------------------------------------------------
321 newtype NgramsTable = NgramsTable [NgramsElement]
322 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
323
324 type NgramsList = NgramsTable
325
326 makePrisms ''NgramsTable
327
328 -- | Question: why these repetition of Type in this instance
329 -- may you document it please ?
330 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
331 each = _NgramsTable . each
332
333 -- TODO discuss
334 -- | TODO Check N and Weight
335 {-
336 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
337 toNgramsElement ns = map toNgramsElement' ns
338 where
339 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
340 where
341 p' = case p of
342 Nothing -> Nothing
343 Just x -> lookup x mapParent
344 c' = maybe mempty identity $ lookup t mapChildren
345 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
346
347 mapParent :: Map Int Text
348 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
349
350 mapChildren :: Map Text (Set Text)
351 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
352 $ Map.fromListWith (<>)
353 $ map (first fromJust)
354 $ filter (isJust . fst)
355 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
356 -}
357
358 mockTable :: NgramsTable
359 mockTable = NgramsTable
360 [ mkNgramsElement "animal" MapTerm Nothing (mSetFromList ["dog", "cat"])
361 , mkNgramsElement "cat" MapTerm (rp "animal") mempty
362 , mkNgramsElement "cats" StopTerm Nothing mempty
363 , mkNgramsElement "dog" MapTerm (rp "animal") (mSetFromList ["dogs"])
364 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
365 , mkNgramsElement "fox" MapTerm Nothing mempty
366 , mkNgramsElement "object" CandidateTerm Nothing mempty
367 , mkNgramsElement "nothing" StopTerm Nothing mempty
368 , mkNgramsElement "organic" MapTerm Nothing (mSetFromList ["flower"])
369 , mkNgramsElement "flower" MapTerm (rp "organic") mempty
370 , mkNgramsElement "moon" CandidateTerm Nothing mempty
371 , mkNgramsElement "sky" StopTerm Nothing mempty
372 ]
373 where
374 rp n = Just $ RootParent n n
375
376 instance Arbitrary NgramsTable where
377 arbitrary = pure mockTable
378
379 instance ToSchema NgramsTable
380
381 ------------------------------------------------------------------------
382 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
383 ------------------------------------------------------------------------
384 -- On the Client side:
385 --data Action = InGroup NgramsId NgramsId
386 -- | OutGroup NgramsId NgramsId
387 -- | SetListType NgramsId ListType
388
389 data PatchSet a = PatchSet
390 { _rem :: Set a
391 , _add :: Set a
392 }
393 deriving (Eq, Ord, Show, Generic)
394
395 makeLenses ''PatchSet
396 makePrisms ''PatchSet
397
398 instance ToJSON a => ToJSON (PatchSet a) where
399 toJSON = genericToJSON $ unPrefix "_"
400 toEncoding = genericToEncoding $ unPrefix "_"
401
402 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
403 parseJSON = genericParseJSON $ unPrefix "_"
404
405 {-
406 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
407 arbitrary = PatchSet <$> arbitrary <*> arbitrary
408
409 type instance Patched (PatchSet a) = Set a
410
411 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
412 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
413
414 instance Ord a => Semigroup (PatchSet a) where
415 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
416 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
417 } -- TODO Review
418
419 instance Ord a => Monoid (PatchSet a) where
420 mempty = PatchSet mempty mempty
421
422 instance Ord a => Group (PatchSet a) where
423 invert (PatchSet r a) = PatchSet a r
424
425 instance Ord a => Composable (PatchSet a) where
426 composable _ _ = undefined
427
428 instance Ord a => Action (PatchSet a) (Set a) where
429 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
430
431 instance Applicable (PatchSet a) (Set a) where
432 applicable _ _ = mempty
433
434 instance Ord a => Validity (PatchSet a) where
435 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
436
437 instance Ord a => Transformable (PatchSet a) where
438 transformable = undefined
439
440 conflicts _p _q = undefined
441
442 transformWith conflict p q = undefined conflict p q
443
444 instance ToSchema a => ToSchema (PatchSet a)
445 -}
446
447 type AddRem = Replace (Maybe ())
448
449 instance Serialise AddRem
450
451 remPatch, addPatch :: AddRem
452 remPatch = replace (Just ()) Nothing
453 addPatch = replace Nothing (Just ())
454
455 isRem :: Replace (Maybe ()) -> Bool
456 isRem = (== remPatch)
457
458 type PatchMap = PM.PatchMap
459
460
461 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
462 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid, Group,
463 Transformable, Composable)
464
465 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
466 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
467
468 instance (Serialise a, Ord a) => Serialise (PatchMap a AddRem)
469 instance (Serialise a, Ord a) => Serialise (PatchMSet a)
470
471 -- TODO this breaks module abstraction
472 makePrisms ''PM.PatchMap
473
474 makePrisms ''PatchMSet
475
476 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
477 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
478 where
479 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
480 f = Map.partition isRem >>> both %~ Map.keysSet
481
482 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
483 g (rems, adds) = Map.fromSet (const remPatch) rems
484 <> Map.fromSet (const addPatch) adds
485
486 instance Ord a => Action (PatchMSet a) (MSet a) where
487 act (PatchMSet p) (MSet m) = MSet $ act p m
488
489 instance Ord a => Applicable (PatchMSet a) (MSet a) where
490 applicable (PatchMSet p) (MSet m) = applicable p m
491
492 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
493 toJSON = toJSON . view _PatchMSetIso
494 toEncoding = toEncoding . view _PatchMSetIso
495
496 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
497 parseJSON = fmap (_PatchMSetIso #) . parseJSON
498
499 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
500 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
501
502 instance ToSchema a => ToSchema (PatchMSet a) where
503 -- TODO
504 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
505
506 type instance Patched (PatchMSet a) = MSet a
507
508 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
509 arbitrary = uncurry replace <$> arbitrary
510 -- If they happen to be equal then the patch is Keep.
511
512 instance ToSchema a => ToSchema (Replace a) where
513 declareNamedSchema (_ :: Proxy (Replace a)) = do
514 -- TODO Keep constructor is not supported here.
515 aSchema <- declareSchemaRef (Proxy :: Proxy a)
516 return $ NamedSchema (Just "Replace") $ mempty
517 & type_ ?~ SwaggerObject
518 & properties .~
519 InsOrdHashMap.fromList
520 [ ("old", aSchema)
521 , ("new", aSchema)
522 ]
523 & required .~ [ "old", "new" ]
524
525 data NgramsPatch
526 = NgramsPatch { _patch_children :: PatchMSet NgramsTerm
527 , _patch_list :: Replace ListType -- TODO Map UserId ListType
528 }
529 | NgramsReplace { _patch_old :: Maybe NgramsRepoElement
530 , _patch_new :: Maybe NgramsRepoElement
531 }
532 deriving (Eq, Show, Generic)
533
534 -- The JSON encoding is untagged, this is OK since the field names are disjoints and thus the encoding is unambiguous.
535 -- TODO: the empty object should be accepted and treated as mempty.
536 deriveJSON (unPrefixUntagged "_") ''NgramsPatch
537 makeLenses ''NgramsPatch
538
539 -- TODO: This instance is simplified since we should either have the fields children and/or list
540 -- or the fields old and/or new.
541 instance ToSchema NgramsPatch where
542 declareNamedSchema _ = do
543 childrenSch <- declareSchemaRef (Proxy :: Proxy (PatchMSet NgramsTerm))
544 listSch <- declareSchemaRef (Proxy :: Proxy (Replace ListType))
545 nreSch <- declareSchemaRef (Proxy :: Proxy NgramsRepoElement)
546 return $ NamedSchema (Just "NgramsPatch") $ mempty
547 & type_ ?~ SwaggerObject
548 & properties .~
549 InsOrdHashMap.fromList
550 [ ("children", childrenSch)
551 , ("list", listSch)
552 , ("old", nreSch)
553 , ("new", nreSch)
554 ]
555
556 instance Arbitrary NgramsPatch where
557 arbitrary = frequency [ (9, NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary))
558 , (1, NgramsReplace <$> arbitrary <*> arbitrary)
559 ]
560
561 instance Serialise NgramsPatch
562 instance Serialise (Replace ListType)
563
564 instance Serialise ListType
565
566 type NgramsPatchIso =
567 MaybePatch NgramsRepoElement (PairPatch (PatchMSet NgramsTerm) (Replace ListType))
568
569 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
570 _NgramsPatch = iso unwrap wrap
571 where
572 unwrap (NgramsPatch c l) = Mod $ PairPatch (c, l)
573 unwrap (NgramsReplace o n) = replace o n
574 wrap x =
575 case unMod x of
576 Just (PairPatch (c, l)) -> NgramsPatch c l
577 Nothing -> NgramsReplace (x ^? old . _Just) (x ^? new . _Just)
578
579 instance Semigroup NgramsPatch where
580 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
581
582 instance Monoid NgramsPatch where
583 mempty = _NgramsPatch # mempty
584
585 instance Validity NgramsPatch where
586 validate p = p ^. _NgramsPatch . to validate
587
588 instance Transformable NgramsPatch where
589 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
590
591 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
592
593 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
594 where
595 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
596
597 type ConflictResolutionNgramsPatch =
598 ( ConflictResolutionReplace (Maybe NgramsRepoElement)
599 , ( ConflictResolutionPatchMSet NgramsTerm
600 , ConflictResolutionReplace ListType
601 )
602 , (Bool, Bool)
603 )
604 type instance ConflictResolution NgramsPatch =
605 ConflictResolutionNgramsPatch
606
607 type PatchedNgramsPatch = Maybe NgramsRepoElement
608 type instance Patched NgramsPatch = PatchedNgramsPatch
609
610 instance Applicable (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
611 applicable (PairPatch (c, l)) n = applicable c (n ^. nre_children) <> applicable l (n ^. nre_list)
612
613 instance Action (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
614 act (PairPatch (c, l)) = (nre_children %~ act c)
615 . (nre_list %~ act l)
616
617 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
618 applicable p = applicable (p ^. _NgramsPatch)
619
620 instance Action NgramsPatch (Maybe NgramsRepoElement) where
621 act p = act (p ^. _NgramsPatch)
622
623 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
624 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
625
626 instance Serialise NgramsTablePatch
627 instance Serialise (PatchMap NgramsTerm NgramsPatch)
628
629 instance FromField NgramsTablePatch
630 where
631 fromField = fromField'
632
633 instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
634 where
635 fromField = fromField'
636
637 type instance ConflictResolution NgramsTablePatch =
638 NgramsTerm -> ConflictResolutionNgramsPatch
639
640 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
641 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
642 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
643
644 makePrisms ''NgramsTablePatch
645 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
646 instance ToSchema NgramsTablePatch
647
648 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
649 applicable p = applicable (p ^. _NgramsTablePatch)
650
651 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
652 act p =
653 fmap (execState (reParentNgramsTablePatch p)) .
654 act (p ^. _NgramsTablePatch)
655
656 instance Arbitrary NgramsTablePatch where
657 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
658
659 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
660 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
661 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
662
663 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
664
665 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
666 reRootChildren root ngram = do
667 nre <- use $ at ngram
668 forOf_ (_Just . nre_children . folded) nre $ \child -> do
669 at child . _Just . nre_root ?= root
670 reRootChildren root child
671
672 reParent :: Maybe RootParent -> ReParent NgramsTerm
673 reParent rp child = do
674 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
675 . (nre_root .~ (_rp_root <$> rp))
676 )
677 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
678
679 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
680 reParentAddRem rp child p =
681 reParent (if isRem p then Nothing else Just rp) child
682
683 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
684 reParentNgramsPatch parent ngramsPatch = do
685 root_of_parent <- use (at parent . _Just . nre_root)
686 let
687 root = fromMaybe parent root_of_parent
688 rp = RootParent { _rp_root = root, _rp_parent = parent }
689 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
690 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
691
692 reParentNgramsTablePatch :: ReParent NgramsTablePatch
693 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
694 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
695
696 ------------------------------------------------------------------------
697 ------------------------------------------------------------------------
698 type Version = Int
699
700 data Versioned a = Versioned
701 { _v_version :: Version
702 , _v_data :: a
703 }
704 deriving (Generic, Show, Eq)
705 deriveJSON (unPrefix "_v_") ''Versioned
706 makeLenses ''Versioned
707 instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where
708 declareNamedSchema = wellNamedSchema "_v_"
709 instance Arbitrary a => Arbitrary (Versioned a) where
710 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
711
712
713 {-
714 -- TODO sequences of modifications (Patchs)
715 type NgramsIdPatch = Patch NgramsId NgramsPatch
716
717 ngramsPatch :: Int -> NgramsPatch
718 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
719
720 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
721 toEdit n p = Edit n p
722 ngramsIdPatch :: Patch NgramsId NgramsPatch
723 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
724 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
725 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
726 ]
727
728 -- applyPatchBack :: Patch -> IO Patch
729 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
730 -}
731 ------------------------------------------------------------------------
732 ------------------------------------------------------------------------
733 ------------------------------------------------------------------------
734
735 {-
736 -- TODO: Replace.old is ignored which means that if the current list
737 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
738 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
739 -- However this should not happen in non conflicting situations.
740 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
741 mkListsUpdate nt patches =
742 [ (ngramsTypeId nt, ng, listTypeId lt)
743 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
744 , lt <- patch ^.. patch_list . new
745 ]
746
747 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
748 -> NgramsType
749 -> NgramsTablePatch
750 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
751 mkChildrenGroups addOrRem nt patches =
752 [ (ngramsTypeId nt, parent, child)
753 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
754 , child <- patch ^.. patch_children . to addOrRem . folded
755 ]
756 -}
757
758 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
759 ngramsTypeFromTabType tabType =
760 let lieu = "Garg.API.Ngrams: " :: Text in
761 case tabType of
762 Sources -> TableNgrams.Sources
763 Authors -> TableNgrams.Authors
764 Institutes -> TableNgrams.Institutes
765 Terms -> TableNgrams.NgramsTerms
766 _ -> panic $ lieu <> "No Ngrams for this tab"
767 -- TODO: This `panic` would disapear with custom NgramsType.
768
769 ------------------------------------------------------------------------
770 data Repo s p = Repo
771 { _r_version :: Version
772 , _r_state :: s
773 , _r_history :: [p]
774 -- first patch in the list is the most recent
775 }
776 deriving (Generic)
777
778 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
779 parseJSON = genericParseJSON $ unPrefix "_r_"
780
781 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
782 toJSON = genericToJSON $ unPrefix "_r_"
783 toEncoding = genericToEncoding $ unPrefix "_r_"
784
785 instance (Serialise s, Serialise p) => Serialise (Repo s p)
786
787 makeLenses ''Repo
788
789 initRepo :: Monoid s => Repo s p
790 initRepo = Repo 1 mempty []
791
792 type NgramsRepo = Repo NgramsState NgramsStatePatch
793 type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
794 type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
795
796 instance Serialise (PM.PatchMap NodeId NgramsTablePatch)
797 instance Serialise NgramsStatePatch
798
799 initMockRepo :: NgramsRepo
800 initMockRepo = Repo 1 s []
801 where
802 s = Map.singleton TableNgrams.NgramsTerms
803 $ Map.singleton 47254
804 $ Map.fromList
805 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
806
807 data RepoEnv = RepoEnv
808 { _renv_var :: !(MVar NgramsRepo)
809 , _renv_saver :: !(IO ())
810 , _renv_lock :: !FileLock
811 }
812 deriving (Generic)
813
814 makeLenses ''RepoEnv
815
816 class HasRepoVar env where
817 repoVar :: Getter env (MVar NgramsRepo)
818
819 instance HasRepoVar (MVar NgramsRepo) where
820 repoVar = identity
821
822 class HasRepoSaver env where
823 repoSaver :: Getter env (IO ())
824
825 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
826 repoEnv :: Getter env RepoEnv
827
828 instance HasRepo RepoEnv where
829 repoEnv = identity
830
831 instance HasRepoVar RepoEnv where
832 repoVar = renv_var
833
834 instance HasRepoSaver RepoEnv where
835 repoSaver = renv_saver
836
837 type RepoCmdM env err m =
838 ( MonadReader env m
839 , MonadError err m
840 , MonadBaseControl IO m
841 , HasRepo env
842 )
843 ------------------------------------------------------------------------
844
845 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
846 => m ()
847 saveRepo = liftBase =<< view repoSaver
848
849 listTypeConflictResolution :: ListType -> ListType -> ListType
850 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
851
852 ngramsStatePatchConflictResolution
853 :: TableNgrams.NgramsType
854 -> NodeId
855 -> NgramsTerm
856 -> ConflictResolutionNgramsPatch
857 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
858 = (ours, (const ours, ours), (False, False))
859 -- ^------^------- they mean that Mod has always priority.
860 --(True, False) <- would mean priority to the left (same as ours).
861
862 -- undefined {- TODO think this through -}, listTypeConflictResolution)
863
864 -- Current state:
865 -- Insertions are not considered as patches,
866 -- they do not extend history,
867 -- they do not bump version.
868 insertNewOnly :: a -> Maybe b -> a
869 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
870 -- TODO error handling
871
872 something :: Monoid a => Maybe a -> a
873 something Nothing = mempty
874 something (Just a) = a
875
876 {- unused
877 -- TODO refactor with putListNgrams
878 copyListNgrams :: RepoCmdM env err m
879 => NodeId -> NodeId -> NgramsType
880 -> m ()
881 copyListNgrams srcListId dstListId ngramsType = do
882 var <- view repoVar
883 liftBase $ modifyMVar_ var $
884 pure . (r_state . at ngramsType %~ (Just . f . something))
885 saveRepo
886 where
887 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
888 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
889
890 -- TODO refactor with putListNgrams
891 -- The list must be non-empty!
892 -- The added ngrams must be non-existent!
893 addListNgrams :: RepoCmdM env err m
894 => NodeId -> NgramsType
895 -> [NgramsElement] -> m ()
896 addListNgrams listId ngramsType nes = do
897 var <- view repoVar
898 liftBase $ modifyMVar_ var $
899 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
900 saveRepo
901 where
902 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
903 -}
904
905 -- UNSAFE
906 rmListNgrams :: RepoCmdM env err m
907 => ListId
908 -> TableNgrams.NgramsType
909 -> m ()
910 rmListNgrams l nt = setListNgrams l nt mempty
911
912 -- | TODO: incr the Version number
913 -- && should use patch
914 -- UNSAFE
915 setListNgrams :: RepoCmdM env err m
916 => NodeId
917 -> TableNgrams.NgramsType
918 -> Map NgramsTerm NgramsRepoElement
919 -> m ()
920 setListNgrams listId ngramsType ns = do
921 var <- view repoVar
922 liftBase $ modifyMVar_ var $
923 pure . ( r_state
924 . at ngramsType %~
925 (Just .
926 (at listId .~ ( Just ns))
927 . something
928 )
929 )
930 saveRepo
931
932 -- NOTE
933 -- This is no longer part of the API.
934 -- This function is maintained for its usage in Database.Action.Flow.List.
935 -- If the given list of ngrams elements contains ngrams already in
936 -- the repo, they will be ignored.
937 putListNgrams :: (HasInvalidError err, RepoCmdM env err m)
938 => NodeId
939 -> TableNgrams.NgramsType
940 -> [NgramsElement] -> m ()
941 putListNgrams _ _ [] = pure ()
942 putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
943 where
944 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
945
946 putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
947 => NodeId
948 -> TableNgrams.NgramsType
949 -> Map NgramsTerm NgramsRepoElement
950 -> m ()
951 putListNgrams' nodeId ngramsType ns = do
952 -- printDebug "[putLictNgrams'] nodeId" nodeId
953 -- printDebug "[putLictNgrams'] ngramsType" ngramsType
954 -- printDebug "[putListNgrams'] ns" ns
955
956 let p1 = NgramsTablePatch . PM.fromMap $ NgramsReplace Nothing . Just <$> ns
957 (p0, p0_validity) = PM.singleton nodeId p1
958 (p, p_validity) = PM.singleton ngramsType p0
959 assertValid p0_validity
960 assertValid p_validity
961 {-
962 -- TODO
963 v <- currentVersion
964 q <- commitStatePatch (Versioned v p)
965 assert empty q
966 -- What if another commit comes in between?
967 -- Shall we have a blindCommitStatePatch? It would not ask for a version but just a patch.
968 -- The modifyMVar_ would test the patch with applicable first.
969 -- If valid the rest would be atomic and no merge is required.
970 -}
971 var <- view repoVar
972 liftBase $ modifyMVar_ var $ \r -> do
973 pure $ r & r_version +~ 1
974 & r_history %~ (p :)
975 & r_state . at ngramsType %~
976 (Just .
977 (at nodeId %~
978 ( Just
979 . (<> ns)
980 . something
981 )
982 )
983 . something
984 )
985 saveRepo
986
987
988 currentVersion :: RepoCmdM env err m
989 => m Version
990 currentVersion = do
991 var <- view repoVar
992 r <- liftBase $ readMVar var
993 pure $ r ^. r_version
994
995
996 -- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
997 commitStatePatch :: RepoCmdM env err m => Versioned NgramsStatePatch -> m (Versioned NgramsStatePatch)
998 commitStatePatch (Versioned p_version p) = do
999 var <- view repoVar
1000 vq' <- liftBase $ modifyMVar var $ \r -> do
1001 let
1002 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
1003 (p', q') = transformWith ngramsStatePatchConflictResolution p q
1004 r' = r & r_version +~ 1
1005 & r_state %~ act p'
1006 & r_history %~ (p' :)
1007 {-
1008 -- Ideally we would like to check these properties. However:
1009 -- * They should be checked only to debug the code. The client data
1010 -- should be able to trigger these.
1011 -- * What kind of error should they throw (we are in IO here)?
1012 -- * Should we keep modifyMVar?
1013 -- * Should we throw the validation in an Exception, catch it around
1014 -- modifyMVar and throw it back as an Error?
1015 assertValid $ transformable p q
1016 assertValid $ applicable p' (r ^. r_state)
1017 -}
1018 pure (r', Versioned (r' ^. r_version) q')
1019
1020 saveRepo
1021 pure vq'
1022
1023 -- This is a special case of tableNgramsPut where the input patch is empty.
1024 tableNgramsPull :: RepoCmdM env err m
1025 => ListId
1026 -> TableNgrams.NgramsType
1027 -> Version
1028 -> m (Versioned NgramsTablePatch)
1029 tableNgramsPull listId ngramsType p_version = do
1030 var <- view repoVar
1031 r <- liftBase $ readMVar var
1032
1033 let
1034 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
1035 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
1036
1037 pure (Versioned (r ^. r_version) q_table)
1038
1039 -- Apply the given patch to the DB and returns the patch to be applied on the
1040 -- client.
1041 -- TODO-ACCESS check
1042 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
1043 => TabType -> ListId
1044 -> Versioned NgramsTablePatch
1045 -> m (Versioned NgramsTablePatch)
1046 tableNgramsPut tabType listId (Versioned p_version p_table)
1047 | p_table == mempty = do
1048 let ngramsType = ngramsTypeFromTabType tabType
1049 tableNgramsPull listId ngramsType p_version
1050
1051 | otherwise = do
1052 let ngramsType = ngramsTypeFromTabType tabType
1053 (p0, p0_validity) = PM.singleton listId p_table
1054 (p, p_validity) = PM.singleton ngramsType p0
1055
1056 assertValid p0_validity
1057 assertValid p_validity
1058
1059 commitStatePatch (Versioned p_version p)
1060 <&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
1061
1062 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
1063 mergeNgramsElement _neOld neNew = neNew
1064 {-
1065 { _ne_list :: ListType
1066 If we merge the parents/children we can potentially create cycles!
1067 , _ne_parent :: Maybe NgramsTerm
1068 , _ne_children :: MSet NgramsTerm
1069 }
1070 -}
1071
1072 getNgramsTableMap :: RepoCmdM env err m
1073 => NodeId
1074 -> TableNgrams.NgramsType
1075 -> m (Versioned NgramsTableMap)
1076 getNgramsTableMap nodeId ngramsType = do
1077 v <- view repoVar
1078 repo <- liftBase $ readMVar v
1079 pure $ Versioned (repo ^. r_version)
1080 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1081
1082 dumpJsonTableMap :: RepoCmdM env err m
1083 => Text
1084 -> NodeId
1085 -> TableNgrams.NgramsType
1086 -> m ()
1087 dumpJsonTableMap fpath nodeId ngramsType = do
1088 m <- getNgramsTableMap nodeId ngramsType
1089 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
1090 pure ()
1091
1092 type MinSize = Int
1093 type MaxSize = Int
1094
1095 -- | TODO Errors management
1096 -- TODO: polymorphic for Annuaire or Corpus or ...
1097 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1098 -- TODO: should take only one ListId
1099
1100 getTime' :: MonadBase IO m => m TimeSpec
1101 getTime' = liftBase $ getTime ProcessCPUTime
1102
1103
1104 getTableNgrams :: forall env err m.
1105 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1106 => NodeType -> NodeId -> TabType
1107 -> ListId -> Limit -> Maybe Offset
1108 -> Maybe ListType
1109 -> Maybe MinSize -> Maybe MaxSize
1110 -> Maybe OrderBy
1111 -> (NgramsTerm -> Bool)
1112 -> m (Versioned NgramsTable)
1113 getTableNgrams _nType nId tabType listId limit_ offset
1114 listType minSize maxSize orderBy searchQuery = do
1115
1116 t0 <- getTime'
1117 -- lIds <- selectNodesWithUsername NodeList userMaster
1118 let
1119 ngramsType = ngramsTypeFromTabType tabType
1120 offset' = maybe 0 identity offset
1121 listType' = maybe (const True) (==) listType
1122 minSize' = maybe (const True) (<=) minSize
1123 maxSize' = maybe (const True) (>=) maxSize
1124
1125 selected_node n = minSize' s
1126 && maxSize' s
1127 && searchQuery (n ^. ne_ngrams)
1128 && listType' (n ^. ne_list)
1129 where
1130 s = n ^. ne_size
1131
1132 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1133
1134 ---------------------------------------
1135 sortOnOrder Nothing = identity
1136 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1137 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1138 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1139 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1140
1141 ---------------------------------------
1142 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1143 selectAndPaginate tableMap = roots <> inners
1144 where
1145 list = tableMap ^.. each
1146 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1147 (ne ^. ne_root)
1148 selected_nodes = list & take limit_
1149 . drop offset'
1150 . filter selected_node
1151 . sortOnOrder orderBy
1152 roots = rootOf <$> selected_nodes
1153 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1154 inners = list & filter (selected_inner rootsSet)
1155
1156 ---------------------------------------
1157 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1158 setScores False table = pure table
1159 setScores True table = do
1160 let ngrams_terms = (table ^.. each . ne_ngrams)
1161 t1 <- getTime'
1162 occurrences <- getOccByNgramsOnlyFast' nId
1163 listId
1164 ngramsType
1165 ngrams_terms
1166 t2 <- getTime'
1167 liftBase $ hprint stderr
1168 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1169 (length ngrams_terms) t1 t2
1170 {-
1171 occurrences <- getOccByNgramsOnlySlow nType nId
1172 (lIds <> [listId])
1173 ngramsType
1174 ngrams_terms
1175 -}
1176 let
1177 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1178
1179 pure $ table & each %~ setOcc
1180 ---------------------------------------
1181
1182 -- lists <- catMaybes <$> listsWith userMaster
1183 -- trace (show lists) $
1184 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1185
1186 let scoresNeeded = needsScores orderBy
1187 tableMap1 <- getNgramsTableMap listId ngramsType
1188 t1 <- getTime'
1189 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1190 . Map.mapWithKey ngramsElementFromRepo
1191 t2 <- getTime'
1192 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1193 . setScores (not scoresNeeded)
1194 . selectAndPaginate
1195 t3 <- getTime'
1196 liftBase $ hprint stderr
1197 ("getTableNgrams total=" % timeSpecs
1198 % " map1=" % timeSpecs
1199 % " map2=" % timeSpecs
1200 % " map3=" % timeSpecs
1201 % " sql=" % (if scoresNeeded then "map2" else "map3")
1202 % "\n"
1203 ) t0 t3 t0 t1 t1 t2 t2 t3
1204 pure tableMap3
1205
1206
1207 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
1208 scoresRecomputeTableNgrams nId tabType listId = do
1209 tableMap <- getNgramsTableMap listId ngramsType
1210 _ <- tableMap & v_data %%~ setScores
1211 . Map.mapWithKey ngramsElementFromRepo
1212
1213 pure $ 1
1214 where
1215 ngramsType = ngramsTypeFromTabType tabType
1216
1217 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
1218 setScores table = do
1219 let ngrams_terms = (table ^.. each . ne_ngrams)
1220 occurrences <- getOccByNgramsOnlyFast' nId
1221 listId
1222 ngramsType
1223 ngrams_terms
1224 let
1225 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1226
1227 pure $ table & each %~ setOcc
1228
1229
1230
1231 -- APIs
1232
1233 -- TODO: find a better place for the code above, All APIs stay here
1234 type QueryParamR = QueryParam' '[Required, Strict]
1235
1236 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1237 deriving (Generic, Enum, Bounded, Read, Show)
1238
1239 instance FromHttpApiData OrderBy
1240 where
1241 parseUrlPiece "TermAsc" = pure TermAsc
1242 parseUrlPiece "TermDesc" = pure TermDesc
1243 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1244 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1245 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1246
1247
1248 instance ToParamSchema OrderBy
1249 instance FromJSON OrderBy
1250 instance ToJSON OrderBy
1251 instance ToSchema OrderBy
1252 instance Arbitrary OrderBy
1253 where
1254 arbitrary = elements [minBound..maxBound]
1255
1256 needsScores :: Maybe OrderBy -> Bool
1257 needsScores (Just ScoreAsc) = True
1258 needsScores (Just ScoreDesc) = True
1259 needsScores _ = False
1260
1261 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1262 :> QueryParamR "ngramsType" TabType
1263 :> QueryParamR "list" ListId
1264 :> QueryParamR "limit" Limit
1265 :> QueryParam "offset" Offset
1266 :> QueryParam "listType" ListType
1267 :> QueryParam "minTermSize" MinSize
1268 :> QueryParam "maxTermSize" MaxSize
1269 :> QueryParam "orderBy" OrderBy
1270 :> QueryParam "search" Text
1271 :> Get '[JSON] (Versioned NgramsTable)
1272
1273 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1274 :> QueryParamR "ngramsType" TabType
1275 :> QueryParamR "list" ListId
1276 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1277 :> Put '[JSON] (Versioned NgramsTablePatch)
1278
1279 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
1280 :> QueryParamR "ngramsType" TabType
1281 :> QueryParamR "list" ListId
1282 :> "recompute" :> Post '[JSON] Int
1283
1284 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
1285 :> QueryParamR "ngramsType" TabType
1286 :> QueryParamR "list" ListId
1287 :> Get '[JSON] Version
1288
1289 type TableNgramsApi = TableNgramsApiGet
1290 :<|> TableNgramsApiPut
1291 :<|> RecomputeScoresNgramsApiGet
1292 :<|> "version" :> TableNgramsApiGetVersion
1293
1294 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1295 => NodeId
1296 -> TabType
1297 -> ListId
1298 -> Limit
1299 -> Maybe Offset
1300 -> Maybe ListType
1301 -> Maybe MinSize -> Maybe MaxSize
1302 -> Maybe OrderBy
1303 -> Maybe Text -- full text search
1304 -> m (Versioned NgramsTable)
1305 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1306 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1307 where
1308 searchQuery = maybe (const True) isInfixOf mt
1309
1310 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1311 => NodeId
1312 -> TabType
1313 -> ListId
1314 -> m Version
1315 getTableNgramsVersion _nId _tabType _listId = currentVersion
1316 -- TODO: limit?
1317 -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
1318 -- This line above looks like a waste of computation to finally get only the version.
1319 -- See the comment about listNgramsChangedSince.
1320
1321
1322 -- | Text search is deactivated for now for ngrams by doc only
1323 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1324 => DocId -> TabType
1325 -> ListId -> Limit -> Maybe Offset
1326 -> Maybe ListType
1327 -> Maybe MinSize -> Maybe MaxSize
1328 -> Maybe OrderBy
1329 -> Maybe Text -- full text search
1330 -> m (Versioned NgramsTable)
1331 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1332 ns <- selectNodesWithUsername NodeList userMaster
1333 let ngramsType = ngramsTypeFromTabType tabType
1334 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1335 let searchQuery = flip S.member (S.fromList ngs)
1336 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1337
1338
1339
1340 apiNgramsTableCorpus :: ( RepoCmdM env err m
1341 , HasNodeError err
1342 , HasInvalidError err
1343 , HasConnectionPool env
1344 , HasConfig env
1345 )
1346 => NodeId -> ServerT TableNgramsApi m
1347 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1348 :<|> tableNgramsPut
1349 :<|> scoresRecomputeTableNgrams cId
1350 :<|> getTableNgramsVersion cId
1351
1352 apiNgramsTableDoc :: ( RepoCmdM env err m
1353 , HasNodeError err
1354 , HasInvalidError err
1355 , HasConnectionPool env
1356 , HasConfig env
1357 )
1358 => DocId -> ServerT TableNgramsApi m
1359 apiNgramsTableDoc dId = getTableNgramsDoc dId
1360 :<|> tableNgramsPut
1361 :<|> scoresRecomputeTableNgrams dId
1362 :<|> getTableNgramsVersion dId
1363 -- > index all the corpus accordingly (TODO AD)
1364
1365 -- Did the given list of ngrams changed since the given version?
1366 -- The returned value is versioned boolean value, meaning that one always retrieve the
1367 -- latest version.
1368 -- If the given version is negative then one simply receive the latest version and True.
1369 -- Using this function is more precise than simply comparing the latest version number
1370 -- with the local version number. Indeed there might be no change to this particular list
1371 -- and still the version number has changed because of other lists.
1372 --
1373 -- Here the added value is to make a compromise between precision, computation, and bandwidth:
1374 -- * currentVersion: good computation, good bandwidth, bad precision.
1375 -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
1376 -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
1377 listNgramsChangedSince :: RepoCmdM env err m
1378 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
1379 listNgramsChangedSince listId ngramsType version
1380 | version < 0 =
1381 Versioned <$> currentVersion <*> pure True
1382 | otherwise =
1383 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1384
1385 -- Instances
1386 instance Arbitrary NgramsRepoElement where
1387 arbitrary = elements $ map ngramsElementToRepo ns
1388 where
1389 NgramsTable ns = mockTable
1390
1391 --{-
1392 instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
1393 where
1394 parseUrlPiece x = maybeToEither x (decode $ cs x)