1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24 {-# OPTIONS -fno-warn-orphans #-}
26 module Gargantext.API.Ngrams
38 , apiNgramsTableCorpus
60 , NgramsRepoElement(..)
69 , ngramsTypeFromTabType
87 , listNgramsChangedSince
91 import Codec.Serialise (Serialise())
92 import Control.Category ((>>>))
93 import Control.Concurrent
94 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
95 import Control.Monad.Base (MonadBase, liftBase)
96 import Control.Monad.Error.Class (MonadError)
97 import Control.Monad.Reader
98 import Control.Monad.State
99 import Control.Monad.Trans.Control (MonadBaseControl)
100 import Data.Aeson hiding ((.=))
101 import Data.Aeson.TH (deriveJSON)
102 import qualified Data.Aeson.Text as DAT
103 import Data.Either (Either(Left))
105 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
106 import qualified Data.List as List
107 import Data.Map.Strict (Map)
108 import qualified Data.Map.Strict as Map
109 import qualified Data.Map.Strict.Patch as PM
110 import Data.Maybe (fromMaybe)
112 import Data.Ord (Down(..))
113 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours)
114 import Data.Set (Set)
115 import qualified Data.Set as S
116 import qualified Data.Set as Set
117 import Data.Swagger hiding (version, patch)
118 import Data.Text (Text, count, isInfixOf, unpack)
119 import Data.Text.Lazy.IO as DTL
121 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
122 import Formatting (hprint, int, (%))
123 import Formatting.Clock (timeSpecs)
124 import GHC.Generics (Generic)
125 import Servant hiding (Patch)
126 import System.Clock (getTime, TimeSpec, Clock(..))
127 import System.FileLock (FileLock)
128 import System.IO (stderr)
129 import Test.QuickCheck (elements)
130 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
132 import Prelude (error)
133 import Protolude (maybeToEither)
134 import Gargantext.Prelude
136 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
137 import Gargantext.Core.Types (TODO)
138 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
139 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
140 import Gargantext.Database.Query.Table.Node.Select
141 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
142 import Gargantext.Database.Admin.Config (userMaster)
143 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
144 import Gargantext.Database.Admin.Types.Node (NodeType(..))
145 import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig)
146 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
148 ------------------------------------------------------------------------
149 --data FacetFormat = Table | Chart
150 data TabType = Docs | Trash | MoreFav | MoreTrash
151 | Terms | Sources | Authors | Institutes
153 deriving (Generic, Enum, Bounded, Show)
155 instance FromHttpApiData TabType
157 parseUrlPiece "Docs" = pure Docs
158 parseUrlPiece "Trash" = pure Trash
159 parseUrlPiece "MoreFav" = pure MoreFav
160 parseUrlPiece "MoreTrash" = pure MoreTrash
162 parseUrlPiece "Terms" = pure Terms
163 parseUrlPiece "Sources" = pure Sources
164 parseUrlPiece "Institutes" = pure Institutes
165 parseUrlPiece "Authors" = pure Authors
167 parseUrlPiece "Contacts" = pure Contacts
169 parseUrlPiece _ = Left "Unexpected value of TabType"
171 instance ToParamSchema TabType
172 instance ToJSON TabType
173 instance FromJSON TabType
174 instance ToSchema TabType
175 instance Arbitrary TabType
177 arbitrary = elements [minBound .. maxBound]
179 newtype MSet a = MSet (Map a ())
180 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
182 instance ToJSON a => ToJSON (MSet a) where
183 toJSON (MSet m) = toJSON (Map.keys m)
184 toEncoding (MSet m) = toEncoding (Map.keys m)
186 mSetFromSet :: Set a -> MSet a
187 mSetFromSet = MSet . Map.fromSet (const ())
189 mSetFromList :: Ord a => [a] -> MSet a
190 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
192 -- mSetToSet :: Ord a => MSet a -> Set a
193 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
194 mSetToSet :: Ord a => MSet a -> Set a
195 mSetToSet = Set.fromList . mSetToList
197 mSetToList :: MSet a -> [a]
198 mSetToList (MSet a) = Map.keys a
200 instance Foldable MSet where
201 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
203 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
204 parseJSON = fmap mSetFromList . parseJSON
206 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
208 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
210 ------------------------------------------------------------------------
211 type NgramsTerm = Text
213 data RootParent = RootParent
214 { _rp_root :: NgramsTerm
215 , _rp_parent :: NgramsTerm
217 deriving (Ord, Eq, Show, Generic)
219 deriveJSON (unPrefix "_rp_") ''RootParent
220 makeLenses ''RootParent
222 data NgramsRepoElement = NgramsRepoElement
224 , _nre_list :: ListType
225 --, _nre_root_parent :: Maybe RootParent
226 , _nre_root :: Maybe NgramsTerm
227 , _nre_parent :: Maybe NgramsTerm
228 , _nre_children :: MSet NgramsTerm
230 deriving (Ord, Eq, Show, Generic)
232 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
233 makeLenses ''NgramsRepoElement
235 instance ToSchema NgramsRepoElement where
236 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
238 instance Serialise (MSet NgramsTerm)
239 instance Serialise NgramsRepoElement
242 NgramsElement { _ne_ngrams :: NgramsTerm
244 , _ne_list :: ListType
245 , _ne_occurrences :: Int
246 , _ne_root :: Maybe NgramsTerm
247 , _ne_parent :: Maybe NgramsTerm
248 , _ne_children :: MSet NgramsTerm
250 deriving (Ord, Eq, Show, Generic)
252 deriveJSON (unPrefix "_ne_") ''NgramsElement
253 makeLenses ''NgramsElement
255 mkNgramsElement :: NgramsTerm
260 mkNgramsElement ngrams list rp children =
261 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
264 size = 1 + count " " ngrams
266 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
267 newNgramsElement mayList ngrams =
268 mkNgramsElement ngrams (fromMaybe MapTerm mayList) Nothing mempty
270 instance ToSchema NgramsElement where
271 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
272 instance Arbitrary NgramsElement where
273 arbitrary = elements [newNgramsElement Nothing "sport"]
275 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
277 (NgramsElement { _ne_size = s
291 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
292 ngramsElementFromRepo
301 NgramsElement { _ne_size = s
306 , _ne_ngrams = ngrams
307 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
309 -- Here we could use 0 if we want to avoid any `panic`.
310 -- It will not happen using getTableNgrams if
311 -- getOccByNgramsOnly provides a count of occurrences for
312 -- all the ngrams given.
316 ------------------------------------------------------------------------
317 newtype NgramsTable = NgramsTable [NgramsElement]
318 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
320 type NgramsList = NgramsTable
322 makePrisms ''NgramsTable
324 -- | Question: why these repetition of Type in this instance
325 -- may you document it please ?
326 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
327 each = _NgramsTable . each
330 -- | TODO Check N and Weight
332 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
333 toNgramsElement ns = map toNgramsElement' ns
335 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
339 Just x -> lookup x mapParent
340 c' = maybe mempty identity $ lookup t mapChildren
341 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
343 mapParent :: Map Int Text
344 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
346 mapChildren :: Map Text (Set Text)
347 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
348 $ Map.fromListWith (<>)
349 $ map (first fromJust)
350 $ filter (isJust . fst)
351 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
354 mockTable :: NgramsTable
355 mockTable = NgramsTable
356 [ mkNgramsElement "animal" MapTerm Nothing (mSetFromList ["dog", "cat"])
357 , mkNgramsElement "cat" MapTerm (rp "animal") mempty
358 , mkNgramsElement "cats" StopTerm Nothing mempty
359 , mkNgramsElement "dog" MapTerm (rp "animal") (mSetFromList ["dogs"])
360 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
361 , mkNgramsElement "fox" MapTerm Nothing mempty
362 , mkNgramsElement "object" CandidateTerm Nothing mempty
363 , mkNgramsElement "nothing" StopTerm Nothing mempty
364 , mkNgramsElement "organic" MapTerm Nothing (mSetFromList ["flower"])
365 , mkNgramsElement "flower" MapTerm (rp "organic") mempty
366 , mkNgramsElement "moon" CandidateTerm Nothing mempty
367 , mkNgramsElement "sky" StopTerm Nothing mempty
370 rp n = Just $ RootParent n n
372 instance Arbitrary NgramsTable where
373 arbitrary = pure mockTable
375 instance ToSchema NgramsTable
377 ------------------------------------------------------------------------
378 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
379 ------------------------------------------------------------------------
380 -- On the Client side:
381 --data Action = InGroup NgramsId NgramsId
382 -- | OutGroup NgramsId NgramsId
383 -- | SetListType NgramsId ListType
385 data PatchSet a = PatchSet
389 deriving (Eq, Ord, Show, Generic)
391 makeLenses ''PatchSet
392 makePrisms ''PatchSet
394 instance ToJSON a => ToJSON (PatchSet a) where
395 toJSON = genericToJSON $ unPrefix "_"
396 toEncoding = genericToEncoding $ unPrefix "_"
398 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
399 parseJSON = genericParseJSON $ unPrefix "_"
402 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
403 arbitrary = PatchSet <$> arbitrary <*> arbitrary
405 type instance Patched (PatchSet a) = Set a
407 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
408 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
410 instance Ord a => Semigroup (PatchSet a) where
411 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
412 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
415 instance Ord a => Monoid (PatchSet a) where
416 mempty = PatchSet mempty mempty
418 instance Ord a => Group (PatchSet a) where
419 invert (PatchSet r a) = PatchSet a r
421 instance Ord a => Composable (PatchSet a) where
422 composable _ _ = undefined
424 instance Ord a => Action (PatchSet a) (Set a) where
425 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
427 instance Applicable (PatchSet a) (Set a) where
428 applicable _ _ = mempty
430 instance Ord a => Validity (PatchSet a) where
431 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
433 instance Ord a => Transformable (PatchSet a) where
434 transformable = undefined
436 conflicts _p _q = undefined
438 transformWith conflict p q = undefined conflict p q
440 instance ToSchema a => ToSchema (PatchSet a)
443 type AddRem = Replace (Maybe ())
445 instance Serialise AddRem
447 remPatch, addPatch :: AddRem
448 remPatch = replace (Just ()) Nothing
449 addPatch = replace Nothing (Just ())
451 isRem :: Replace (Maybe ()) -> Bool
452 isRem = (== remPatch)
454 type PatchMap = PM.PatchMap
457 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
458 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
459 Transformable, Composable)
461 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
462 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
464 instance (Serialise a, Ord a) => Serialise (PatchMap a AddRem)
465 instance (Serialise a, Ord a) => Serialise (PatchMSet a)
467 -- TODO this breaks module abstraction
468 makePrisms ''PM.PatchMap
470 makePrisms ''PatchMSet
472 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
473 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
475 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
476 f = Map.partition isRem >>> both %~ Map.keysSet
478 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
479 g (rems, adds) = Map.fromSet (const remPatch) rems
480 <> Map.fromSet (const addPatch) adds
482 instance Ord a => Action (PatchMSet a) (MSet a) where
483 act (PatchMSet p) (MSet m) = MSet $ act p m
485 instance Ord a => Applicable (PatchMSet a) (MSet a) where
486 applicable (PatchMSet p) (MSet m) = applicable p m
488 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
489 toJSON = toJSON . view _PatchMSetIso
490 toEncoding = toEncoding . view _PatchMSetIso
492 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
493 parseJSON = fmap (_PatchMSetIso #) . parseJSON
495 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
496 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
498 instance ToSchema a => ToSchema (PatchMSet a) where
500 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
502 type instance Patched (PatchMSet a) = MSet a
504 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
505 arbitrary = uncurry replace <$> arbitrary
506 -- If they happen to be equal then the patch is Keep.
508 instance ToSchema a => ToSchema (Replace a) where
509 declareNamedSchema (_ :: Proxy (Replace a)) = do
510 -- TODO Keep constructor is not supported here.
511 aSchema <- declareSchemaRef (Proxy :: Proxy a)
512 return $ NamedSchema (Just "Replace") $ mempty
513 & type_ ?~ SwaggerObject
515 InsOrdHashMap.fromList
519 & required .~ [ "old", "new" ]
522 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
523 , _patch_list :: Replace ListType -- TODO Map UserId ListType
525 deriving (Eq, Show, Generic)
527 deriveJSON (unPrefix "_") ''NgramsPatch
528 makeLenses ''NgramsPatch
530 instance ToSchema NgramsPatch where
531 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
533 instance Arbitrary NgramsPatch where
534 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
536 instance Serialise NgramsPatch
537 instance Serialise (Replace ListType)
538 instance Serialise ListType
540 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
542 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
543 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
545 instance Semigroup NgramsPatch where
546 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
548 instance Monoid NgramsPatch where
549 mempty = _NgramsPatch # mempty
551 instance Validity NgramsPatch where
552 validate p = p ^. _NgramsPatch . to validate
554 instance Transformable NgramsPatch where
555 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
557 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
559 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
561 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
563 type ConflictResolutionNgramsPatch =
564 ( ConflictResolutionPatchMSet NgramsTerm
565 , ConflictResolutionReplace ListType
567 type instance ConflictResolution NgramsPatch =
568 ConflictResolutionNgramsPatch
570 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
571 -- ~ Patched NgramsPatchIso
572 type instance Patched NgramsPatch = PatchedNgramsPatch
574 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
575 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
576 applicable p (Just nre) =
577 applicable (p ^. patch_children) (nre ^. nre_children) <>
578 applicable (p ^. patch_list) (nre ^. nre_list)
580 instance Action NgramsPatch NgramsRepoElement where
581 act p = (nre_children %~ act (p ^. patch_children))
582 . (nre_list %~ act (p ^. patch_list))
584 instance Action NgramsPatch (Maybe NgramsRepoElement) where
587 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
588 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
590 instance Serialise NgramsTablePatch
591 instance Serialise (PatchMap NgramsTerm NgramsPatch)
593 instance FromField NgramsTablePatch
595 fromField = fromField'
597 instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
599 fromField = fromField'
601 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
603 type instance ConflictResolution NgramsTablePatch =
604 NgramsTerm -> ConflictResolutionNgramsPatch
606 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
607 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
608 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
610 makePrisms ''NgramsTablePatch
611 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
612 instance ToSchema NgramsTablePatch
614 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
615 applicable p = applicable (p ^. _NgramsTablePatch)
617 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
619 fmap (execState (reParentNgramsTablePatch p)) .
620 act (p ^. _NgramsTablePatch)
622 instance Arbitrary NgramsTablePatch where
623 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
625 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
626 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
627 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
629 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
631 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
632 reRootChildren root ngram = do
633 nre <- use $ at ngram
634 forOf_ (_Just . nre_children . folded) nre $ \child -> do
635 at child . _Just . nre_root ?= root
636 reRootChildren root child
638 reParent :: Maybe RootParent -> ReParent NgramsTerm
639 reParent rp child = do
640 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
641 . (nre_root .~ (_rp_root <$> rp))
643 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
645 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
646 reParentAddRem rp child p =
647 reParent (if isRem p then Nothing else Just rp) child
649 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
650 reParentNgramsPatch parent ngramsPatch = do
651 root_of_parent <- use (at parent . _Just . nre_root)
653 root = fromMaybe parent root_of_parent
654 rp = RootParent { _rp_root = root, _rp_parent = parent }
655 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
656 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
658 reParentNgramsTablePatch :: ReParent NgramsTablePatch
659 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
660 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
662 ------------------------------------------------------------------------
663 ------------------------------------------------------------------------
666 data Versioned a = Versioned
667 { _v_version :: Version
670 deriving (Generic, Show, Eq)
671 deriveJSON (unPrefix "_v_") ''Versioned
672 makeLenses ''Versioned
673 instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where
674 declareNamedSchema = wellNamedSchema "_v_"
675 instance Arbitrary a => Arbitrary (Versioned a) where
676 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
680 -- TODO sequences of modifications (Patchs)
681 type NgramsIdPatch = Patch NgramsId NgramsPatch
683 ngramsPatch :: Int -> NgramsPatch
684 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
686 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
687 toEdit n p = Edit n p
688 ngramsIdPatch :: Patch NgramsId NgramsPatch
689 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
690 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
691 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
694 -- applyPatchBack :: Patch -> IO Patch
695 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
697 ------------------------------------------------------------------------
698 ------------------------------------------------------------------------
699 ------------------------------------------------------------------------
702 -- TODO: Replace.old is ignored which means that if the current list
703 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
704 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
705 -- However this should not happen in non conflicting situations.
706 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
707 mkListsUpdate nt patches =
708 [ (ngramsTypeId nt, ng, listTypeId lt)
709 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
710 , lt <- patch ^.. patch_list . new
713 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
716 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
717 mkChildrenGroups addOrRem nt patches =
718 [ (ngramsTypeId nt, parent, child)
719 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
720 , child <- patch ^.. patch_children . to addOrRem . folded
724 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
725 ngramsTypeFromTabType tabType =
726 let lieu = "Garg.API.Ngrams: " :: Text in
728 Sources -> TableNgrams.Sources
729 Authors -> TableNgrams.Authors
730 Institutes -> TableNgrams.Institutes
731 Terms -> TableNgrams.NgramsTerms
732 _ -> panic $ lieu <> "No Ngrams for this tab"
733 -- TODO: This `panic` would disapear with custom NgramsType.
735 ------------------------------------------------------------------------
737 { _r_version :: Version
740 -- first patch in the list is the most recent
744 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
745 parseJSON = genericParseJSON $ unPrefix "_r_"
747 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
748 toJSON = genericToJSON $ unPrefix "_r_"
749 toEncoding = genericToEncoding $ unPrefix "_r_"
751 instance (Serialise s, Serialise p) => Serialise (Repo s p)
755 initRepo :: Monoid s => Repo s p
756 initRepo = Repo 1 mempty []
758 type NgramsRepo = Repo NgramsState NgramsStatePatch
759 type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
760 type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
762 instance Serialise (PM.PatchMap NodeId NgramsTablePatch)
763 instance Serialise NgramsStatePatch
765 initMockRepo :: NgramsRepo
766 initMockRepo = Repo 1 s []
768 s = Map.singleton TableNgrams.NgramsTerms
769 $ Map.singleton 47254
771 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
773 data RepoEnv = RepoEnv
774 { _renv_var :: !(MVar NgramsRepo)
775 , _renv_saver :: !(IO ())
776 , _renv_lock :: !FileLock
782 class HasRepoVar env where
783 repoVar :: Getter env (MVar NgramsRepo)
785 instance HasRepoVar (MVar NgramsRepo) where
788 class HasRepoSaver env where
789 repoSaver :: Getter env (IO ())
791 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
792 repoEnv :: Getter env RepoEnv
794 instance HasRepo RepoEnv where
797 instance HasRepoVar RepoEnv where
800 instance HasRepoSaver RepoEnv where
801 repoSaver = renv_saver
803 type RepoCmdM env err m =
806 , MonadBaseControl IO m
809 ------------------------------------------------------------------------
811 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
813 saveRepo = liftBase =<< view repoSaver
815 listTypeConflictResolution :: ListType -> ListType -> ListType
816 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
818 ngramsStatePatchConflictResolution
819 :: TableNgrams.NgramsType
822 -> ConflictResolutionNgramsPatch
823 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
825 -- undefined {- TODO think this through -}, listTypeConflictResolution)
828 -- Insertions are not considered as patches,
829 -- they do not extend history,
830 -- they do not bump version.
831 insertNewOnly :: a -> Maybe b -> a
832 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
833 -- TODO error handling
835 something :: Monoid a => Maybe a -> a
836 something Nothing = mempty
837 something (Just a) = a
840 -- TODO refactor with putListNgrams
841 copyListNgrams :: RepoCmdM env err m
842 => NodeId -> NodeId -> NgramsType
844 copyListNgrams srcListId dstListId ngramsType = do
846 liftBase $ modifyMVar_ var $
847 pure . (r_state . at ngramsType %~ (Just . f . something))
850 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
851 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
853 -- TODO refactor with putListNgrams
854 -- The list must be non-empty!
855 -- The added ngrams must be non-existent!
856 addListNgrams :: RepoCmdM env err m
857 => NodeId -> NgramsType
858 -> [NgramsElement] -> m ()
859 addListNgrams listId ngramsType nes = do
861 liftBase $ modifyMVar_ var $
862 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
865 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
868 rmListNgrams :: RepoCmdM env err m
870 -> TableNgrams.NgramsType
872 rmListNgrams l nt = setListNgrams l nt mempty
874 -- | TODO: incr the Version number
875 -- && should use patch
876 setListNgrams :: RepoCmdM env err m
878 -> TableNgrams.NgramsType
879 -> Map NgramsTerm NgramsRepoElement
881 setListNgrams listId ngramsType ns = do
883 liftBase $ modifyMVar_ var $
887 (at listId .~ ( Just ns))
894 -- If the given list of ngrams elements contains ngrams already in
895 -- the repo, they will be ignored.
896 putListNgrams :: RepoCmdM env err m
898 -> TableNgrams.NgramsType
899 -> [NgramsElement] -> m ()
900 putListNgrams _ _ [] = pure ()
901 putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
903 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
905 putListNgrams' :: RepoCmdM env err m
907 -> TableNgrams.NgramsType
908 -> Map NgramsTerm NgramsRepoElement
910 putListNgrams' nodeId ngramsType ns = do
911 -- printDebug "[putLictNgrams'] nodeId" nodeId
912 -- printDebug "[putLictNgrams'] ngramsType" ngramsType
913 -- printDebug "[putListNgrams'] ns" ns
915 liftBase $ modifyMVar_ var $ \r -> do
916 pure $ r & r_version +~ 1
917 & r_history %~ (mempty :)
918 & r_state . at ngramsType %~
932 tableNgramsPost :: RepoCmdM env err m
936 -> [NgramsTerm] -> m ()
937 tableNgramsPost tabType nodeId mayList =
938 putListNgrams nodeId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
940 currentVersion :: RepoCmdM env err m
944 r <- liftBase $ readMVar var
945 pure $ r ^. r_version
947 tableNgramsPull :: RepoCmdM env err m
949 -> TableNgrams.NgramsType
951 -> m (Versioned NgramsTablePatch)
952 tableNgramsPull listId ngramsType p_version = do
954 r <- liftBase $ readMVar var
957 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
958 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
960 pure (Versioned (r ^. r_version) q_table)
962 -- Apply the given patch to the DB and returns the patch to be applied on the
965 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
967 -> Versioned NgramsTablePatch
968 -> m (Versioned NgramsTablePatch)
969 tableNgramsPut tabType listId (Versioned p_version p_table)
970 | p_table == mempty = do
971 let ngramsType = ngramsTypeFromTabType tabType
972 tableNgramsPull listId ngramsType p_version
975 let ngramsType = ngramsTypeFromTabType tabType
976 (p0, p0_validity) = PM.singleton listId p_table
977 (p, p_validity) = PM.singleton ngramsType p0
979 assertValid p0_validity
980 assertValid p_validity
983 vq' <- liftBase $ modifyMVar var $ \r -> do
985 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
986 (p', q') = transformWith ngramsStatePatchConflictResolution p q
987 r' = r & r_version +~ 1
989 & r_history %~ (p' :)
990 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
992 -- Ideally we would like to check these properties. However:
993 -- * They should be checked only to debug the code. The client data
994 -- should be able to trigger these.
995 -- * What kind of error should they throw (we are in IO here)?
996 -- * Should we keep modifyMVar?
997 -- * Should we throw the validation in an Exception, catch it around
998 -- modifyMVar and throw it back as an Error?
999 assertValid $ transformable p q
1000 assertValid $ applicable p' (r ^. r_state)
1002 pure (r', Versioned (r' ^. r_version) q'_table)
1007 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
1008 mergeNgramsElement _neOld neNew = neNew
1010 { _ne_list :: ListType
1011 If we merge the parents/children we can potentially create cycles!
1012 , _ne_parent :: Maybe NgramsTerm
1013 , _ne_children :: MSet NgramsTerm
1017 getNgramsTableMap :: RepoCmdM env err m
1019 -> TableNgrams.NgramsType
1020 -> m (Versioned NgramsTableMap)
1021 getNgramsTableMap nodeId ngramsType = do
1023 repo <- liftBase $ readMVar v
1024 pure $ Versioned (repo ^. r_version)
1025 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1027 dumpJsonTableMap :: RepoCmdM env err m
1030 -> TableNgrams.NgramsType
1032 dumpJsonTableMap fpath nodeId ngramsType = do
1033 m <- getNgramsTableMap nodeId ngramsType
1034 liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
1040 -- | TODO Errors management
1041 -- TODO: polymorphic for Annuaire or Corpus or ...
1042 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1043 -- TODO: should take only one ListId
1045 getTime' :: MonadBase IO m => m TimeSpec
1046 getTime' = liftBase $ getTime ProcessCPUTime
1049 getTableNgrams :: forall env err m.
1050 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1051 => NodeType -> NodeId -> TabType
1052 -> ListId -> Limit -> Maybe Offset
1054 -> Maybe MinSize -> Maybe MaxSize
1056 -> (NgramsTerm -> Bool)
1057 -> m (Versioned NgramsTable)
1058 getTableNgrams _nType nId tabType listId limit_ offset
1059 listType minSize maxSize orderBy searchQuery = do
1062 -- lIds <- selectNodesWithUsername NodeList userMaster
1064 ngramsType = ngramsTypeFromTabType tabType
1065 offset' = maybe 0 identity offset
1066 listType' = maybe (const True) (==) listType
1067 minSize' = maybe (const True) (<=) minSize
1068 maxSize' = maybe (const True) (>=) maxSize
1070 selected_node n = minSize' s
1072 && searchQuery (n ^. ne_ngrams)
1073 && listType' (n ^. ne_list)
1077 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1079 ---------------------------------------
1080 sortOnOrder Nothing = identity
1081 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1082 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1083 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1084 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1086 ---------------------------------------
1087 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1088 selectAndPaginate tableMap = roots <> inners
1090 list = tableMap ^.. each
1091 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1093 selected_nodes = list & take limit_
1095 . filter selected_node
1096 . sortOnOrder orderBy
1097 roots = rootOf <$> selected_nodes
1098 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1099 inners = list & filter (selected_inner rootsSet)
1101 ---------------------------------------
1102 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1103 setScores False table = pure table
1104 setScores True table = do
1105 let ngrams_terms = (table ^.. each . ne_ngrams)
1107 occurrences <- getOccByNgramsOnlyFast' nId
1112 liftBase $ hprint stderr
1113 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1114 (length ngrams_terms) t1 t2
1116 occurrences <- getOccByNgramsOnlySlow nType nId
1122 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1124 pure $ table & each %~ setOcc
1125 ---------------------------------------
1127 -- lists <- catMaybes <$> listsWith userMaster
1128 -- trace (show lists) $
1129 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1131 let scoresNeeded = needsScores orderBy
1132 tableMap1 <- getNgramsTableMap listId ngramsType
1134 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1135 . Map.mapWithKey ngramsElementFromRepo
1137 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1138 . setScores (not scoresNeeded)
1141 liftBase $ hprint stderr
1142 ("getTableNgrams total=" % timeSpecs
1143 % " map1=" % timeSpecs
1144 % " map2=" % timeSpecs
1145 % " map3=" % timeSpecs
1146 % " sql=" % (if scoresNeeded then "map2" else "map3")
1148 ) t0 t3 t0 t1 t1 t2 t2 t3
1152 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
1153 scoresRecomputeTableNgrams nId tabType listId = do
1154 tableMap <- getNgramsTableMap listId ngramsType
1155 _ <- tableMap & v_data %%~ setScores
1156 . Map.mapWithKey ngramsElementFromRepo
1160 ngramsType = ngramsTypeFromTabType tabType
1162 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
1163 setScores table = do
1164 let ngrams_terms = (table ^.. each . ne_ngrams)
1165 occurrences <- getOccByNgramsOnlyFast' nId
1170 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1172 pure $ table & each %~ setOcc
1178 -- TODO: find a better place for the code above, All APIs stay here
1179 type QueryParamR = QueryParam' '[Required, Strict]
1181 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1182 deriving (Generic, Enum, Bounded, Read, Show)
1184 instance FromHttpApiData OrderBy
1186 parseUrlPiece "TermAsc" = pure TermAsc
1187 parseUrlPiece "TermDesc" = pure TermDesc
1188 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1189 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1190 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1193 instance ToParamSchema OrderBy
1194 instance FromJSON OrderBy
1195 instance ToJSON OrderBy
1196 instance ToSchema OrderBy
1197 instance Arbitrary OrderBy
1199 arbitrary = elements [minBound..maxBound]
1201 needsScores :: Maybe OrderBy -> Bool
1202 needsScores (Just ScoreAsc) = True
1203 needsScores (Just ScoreDesc) = True
1204 needsScores _ = False
1206 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1207 :> QueryParamR "ngramsType" TabType
1208 :> QueryParamR "list" ListId
1209 :> QueryParamR "limit" Limit
1210 :> QueryParam "offset" Offset
1211 :> QueryParam "listType" ListType
1212 :> QueryParam "minTermSize" MinSize
1213 :> QueryParam "maxTermSize" MaxSize
1214 :> QueryParam "orderBy" OrderBy
1215 :> QueryParam "search" Text
1216 :> Get '[JSON] (Versioned NgramsTable)
1218 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1219 :> QueryParamR "ngramsType" TabType
1220 :> QueryParamR "list" ListId
1221 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1222 :> Put '[JSON] (Versioned NgramsTablePatch)
1224 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1225 :> QueryParamR "ngramsType" TabType
1226 :> QueryParamR "list" ListId
1227 :> QueryParam "listType" ListType
1228 :> ReqBody '[JSON] [NgramsTerm]
1231 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
1232 :> QueryParamR "ngramsType" TabType
1233 :> QueryParamR "list" ListId
1234 :> "recompute" :> Post '[JSON] Int
1236 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
1237 :> QueryParamR "ngramsType" TabType
1238 :> QueryParamR "list" ListId
1239 :> Get '[JSON] Version
1241 type TableNgramsApi = TableNgramsApiGet
1242 :<|> TableNgramsApiPut
1243 :<|> TableNgramsApiPost
1244 :<|> RecomputeScoresNgramsApiGet
1245 :<|> "version" :> TableNgramsApiGetVersion
1247 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1254 -> Maybe MinSize -> Maybe MaxSize
1256 -> Maybe Text -- full text search
1257 -> m (Versioned NgramsTable)
1258 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1259 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1261 searchQuery = maybe (const True) isInfixOf mt
1263 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1268 getTableNgramsVersion nId tabType listId = do
1270 Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
1273 -- | Text search is deactivated for now for ngrams by doc only
1274 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1276 -> ListId -> Limit -> Maybe Offset
1278 -> Maybe MinSize -> Maybe MaxSize
1280 -> Maybe Text -- full text search
1281 -> m (Versioned NgramsTable)
1282 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1283 ns <- selectNodesWithUsername NodeList userMaster
1284 let ngramsType = ngramsTypeFromTabType tabType
1285 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1286 let searchQuery = flip S.member (S.fromList ngs)
1287 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1291 apiNgramsTableCorpus :: ( RepoCmdM env err m
1293 , HasInvalidError err
1294 , HasConnectionPool env
1297 => NodeId -> ServerT TableNgramsApi m
1298 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1300 :<|> tableNgramsPost
1301 :<|> scoresRecomputeTableNgrams cId
1302 :<|> getTableNgramsVersion cId
1304 apiNgramsTableDoc :: ( RepoCmdM env err m
1306 , HasInvalidError err
1307 , HasConnectionPool env
1310 => DocId -> ServerT TableNgramsApi m
1311 apiNgramsTableDoc dId = getTableNgramsDoc dId
1313 :<|> tableNgramsPost
1314 :<|> scoresRecomputeTableNgrams dId
1315 :<|> getTableNgramsVersion dId
1316 -- > add new ngrams in database (TODO AD)
1317 -- > index all the corpus accordingly (TODO AD)
1319 listNgramsChangedSince :: RepoCmdM env err m
1320 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
1321 listNgramsChangedSince listId ngramsType version
1323 Versioned <$> currentVersion <*> pure True
1325 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1328 instance Arbitrary NgramsRepoElement where
1329 arbitrary = elements $ map ngramsElementToRepo ns
1331 NgramsTable ns = mockTable
1334 instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
1336 parseUrlPiece x = maybeToEither x (decode $ cs x)