1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE ScopedTypeVariables #-}
21 {-# LANGUAGE TemplateHaskell #-}
22 {-# LANGUAGE TypeOperators #-}
23 {-# LANGUAGE TypeFamilies #-}
24 {-# OPTIONS -fno-warn-orphans #-}
26 module Gargantext.API.Ngrams
38 , apiNgramsTableCorpus
60 , NgramsRepoElement(..)
69 , ngramsTypeFromTabType
86 , listNgramsChangedSince
90 import Codec.Serialise (Serialise())
91 import Control.Category ((>>>))
92 import Control.Concurrent
93 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
94 import Control.Monad.Base (MonadBase, liftBase)
95 import Control.Monad.Error.Class (MonadError)
96 import Control.Monad.Reader
97 import Control.Monad.State
98 import Control.Monad.Trans.Control (MonadBaseControl)
99 import Data.Aeson hiding ((.=))
100 import Data.Aeson.TH (deriveJSON)
101 import Data.Either(Either(Left))
102 import Data.Either.Extra (maybeToEither)
104 import Data.Map.Strict (Map)
105 import Data.Maybe (fromMaybe)
107 import Data.Ord (Down(..))
108 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours)
109 import Data.Set (Set)
110 import Data.Swagger hiding (version, patch)
111 import Data.Text (Text, isInfixOf, count)
113 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
114 import Formatting (hprint, int, (%))
115 import Formatting.Clock (timeSpecs)
116 import GHC.Generics (Generic)
117 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
118 import Gargantext.Core.Types (TODO)
119 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
120 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
121 import Gargantext.Database.Query.Table.Node.Select
122 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
123 import Gargantext.Database.Admin.Config (userMaster)
124 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
125 import Gargantext.Database.Admin.Types.Node (NodeType(..))
126 import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig)
127 import Gargantext.Prelude
128 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
129 import Servant hiding (Patch)
130 import System.Clock (getTime, TimeSpec, Clock(..))
131 import System.FileLock (FileLock)
132 import System.IO (stderr)
133 import Test.QuickCheck (elements)
134 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
135 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
136 import qualified Data.List as List
137 import qualified Data.Map.Strict as Map
138 import qualified Data.Map.Strict.Patch as PM
139 import qualified Data.Set as S
140 import qualified Data.Set as Set
141 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
143 ------------------------------------------------------------------------
144 --data FacetFormat = Table | Chart
145 data TabType = Docs | Trash | MoreFav | MoreTrash
146 | Terms | Sources | Authors | Institutes
148 deriving (Generic, Enum, Bounded, Show)
150 instance FromHttpApiData TabType
152 parseUrlPiece "Docs" = pure Docs
153 parseUrlPiece "Trash" = pure Trash
154 parseUrlPiece "MoreFav" = pure MoreFav
155 parseUrlPiece "MoreTrash" = pure MoreTrash
157 parseUrlPiece "Terms" = pure Terms
158 parseUrlPiece "Sources" = pure Sources
159 parseUrlPiece "Institutes" = pure Institutes
160 parseUrlPiece "Authors" = pure Authors
162 parseUrlPiece "Contacts" = pure Contacts
164 parseUrlPiece _ = Left "Unexpected value of TabType"
166 instance ToParamSchema TabType
167 instance ToJSON TabType
168 instance FromJSON TabType
169 instance ToSchema TabType
170 instance Arbitrary TabType
172 arbitrary = elements [minBound .. maxBound]
174 newtype MSet a = MSet (Map a ())
175 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
177 instance ToJSON a => ToJSON (MSet a) where
178 toJSON (MSet m) = toJSON (Map.keys m)
179 toEncoding (MSet m) = toEncoding (Map.keys m)
181 mSetFromSet :: Set a -> MSet a
182 mSetFromSet = MSet . Map.fromSet (const ())
184 mSetFromList :: Ord a => [a] -> MSet a
185 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
187 -- mSetToSet :: Ord a => MSet a -> Set a
188 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
189 mSetToSet :: Ord a => MSet a -> Set a
190 mSetToSet = Set.fromList . mSetToList
192 mSetToList :: MSet a -> [a]
193 mSetToList (MSet a) = Map.keys a
195 instance Foldable MSet where
196 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
198 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
199 parseJSON = fmap mSetFromList . parseJSON
201 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
203 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
205 ------------------------------------------------------------------------
206 type NgramsTerm = Text
208 data RootParent = RootParent
209 { _rp_root :: NgramsTerm
210 , _rp_parent :: NgramsTerm
212 deriving (Ord, Eq, Show, Generic)
214 deriveJSON (unPrefix "_rp_") ''RootParent
215 makeLenses ''RootParent
217 data NgramsRepoElement = NgramsRepoElement
219 , _nre_list :: ListType
220 --, _nre_root_parent :: Maybe RootParent
221 , _nre_root :: Maybe NgramsTerm
222 , _nre_parent :: Maybe NgramsTerm
223 , _nre_children :: MSet NgramsTerm
225 deriving (Ord, Eq, Show, Generic)
227 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
228 makeLenses ''NgramsRepoElement
230 instance ToSchema NgramsRepoElement where
231 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
233 instance Serialise (MSet NgramsTerm)
234 instance Serialise NgramsRepoElement
237 NgramsElement { _ne_ngrams :: NgramsTerm
239 , _ne_list :: ListType
240 , _ne_occurrences :: Int
241 , _ne_root :: Maybe NgramsTerm
242 , _ne_parent :: Maybe NgramsTerm
243 , _ne_children :: MSet NgramsTerm
245 deriving (Ord, Eq, Show, Generic)
247 deriveJSON (unPrefix "_ne_") ''NgramsElement
248 makeLenses ''NgramsElement
250 mkNgramsElement :: NgramsTerm
255 mkNgramsElement ngrams list rp children =
256 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
259 size = 1 + count " " ngrams
261 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
262 newNgramsElement mayList ngrams =
263 mkNgramsElement ngrams (fromMaybe MapTerm mayList) Nothing mempty
265 instance ToSchema NgramsElement where
266 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
267 instance Arbitrary NgramsElement where
268 arbitrary = elements [newNgramsElement Nothing "sport"]
270 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
272 (NgramsElement { _ne_size = s
286 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
287 ngramsElementFromRepo
296 NgramsElement { _ne_size = s
301 , _ne_ngrams = ngrams
302 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
304 -- Here we could use 0 if we want to avoid any `panic`.
305 -- It will not happen using getTableNgrams if
306 -- getOccByNgramsOnly provides a count of occurrences for
307 -- all the ngrams given.
311 ------------------------------------------------------------------------
312 newtype NgramsTable = NgramsTable [NgramsElement]
313 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
315 type NgramsList = NgramsTable
317 makePrisms ''NgramsTable
319 -- | Question: why these repetition of Type in this instance
320 -- may you document it please ?
321 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
322 each = _NgramsTable . each
325 -- | TODO Check N and Weight
327 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
328 toNgramsElement ns = map toNgramsElement' ns
330 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
334 Just x -> lookup x mapParent
335 c' = maybe mempty identity $ lookup t mapChildren
336 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
338 mapParent :: Map Int Text
339 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
341 mapChildren :: Map Text (Set Text)
342 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
343 $ Map.fromListWith (<>)
344 $ map (first fromJust)
345 $ filter (isJust . fst)
346 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
349 mockTable :: NgramsTable
350 mockTable = NgramsTable
351 [ mkNgramsElement "animal" MapTerm Nothing (mSetFromList ["dog", "cat"])
352 , mkNgramsElement "cat" MapTerm (rp "animal") mempty
353 , mkNgramsElement "cats" StopTerm Nothing mempty
354 , mkNgramsElement "dog" MapTerm (rp "animal") (mSetFromList ["dogs"])
355 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
356 , mkNgramsElement "fox" MapTerm Nothing mempty
357 , mkNgramsElement "object" CandidateTerm Nothing mempty
358 , mkNgramsElement "nothing" StopTerm Nothing mempty
359 , mkNgramsElement "organic" MapTerm Nothing (mSetFromList ["flower"])
360 , mkNgramsElement "flower" MapTerm (rp "organic") mempty
361 , mkNgramsElement "moon" CandidateTerm Nothing mempty
362 , mkNgramsElement "sky" StopTerm Nothing mempty
365 rp n = Just $ RootParent n n
367 instance Arbitrary NgramsTable where
368 arbitrary = pure mockTable
370 instance ToSchema NgramsTable
372 ------------------------------------------------------------------------
373 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
374 ------------------------------------------------------------------------
375 -- On the Client side:
376 --data Action = InGroup NgramsId NgramsId
377 -- | OutGroup NgramsId NgramsId
378 -- | SetListType NgramsId ListType
380 data PatchSet a = PatchSet
384 deriving (Eq, Ord, Show, Generic)
386 makeLenses ''PatchSet
387 makePrisms ''PatchSet
389 instance ToJSON a => ToJSON (PatchSet a) where
390 toJSON = genericToJSON $ unPrefix "_"
391 toEncoding = genericToEncoding $ unPrefix "_"
393 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
394 parseJSON = genericParseJSON $ unPrefix "_"
397 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
398 arbitrary = PatchSet <$> arbitrary <*> arbitrary
400 type instance Patched (PatchSet a) = Set a
402 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
403 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
405 instance Ord a => Semigroup (PatchSet a) where
406 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
407 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
410 instance Ord a => Monoid (PatchSet a) where
411 mempty = PatchSet mempty mempty
413 instance Ord a => Group (PatchSet a) where
414 invert (PatchSet r a) = PatchSet a r
416 instance Ord a => Composable (PatchSet a) where
417 composable _ _ = undefined
419 instance Ord a => Action (PatchSet a) (Set a) where
420 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
422 instance Applicable (PatchSet a) (Set a) where
423 applicable _ _ = mempty
425 instance Ord a => Validity (PatchSet a) where
426 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
428 instance Ord a => Transformable (PatchSet a) where
429 transformable = undefined
431 conflicts _p _q = undefined
433 transformWith conflict p q = undefined conflict p q
435 instance ToSchema a => ToSchema (PatchSet a)
438 type AddRem = Replace (Maybe ())
440 instance Serialise AddRem
442 remPatch, addPatch :: AddRem
443 remPatch = replace (Just ()) Nothing
444 addPatch = replace Nothing (Just ())
446 isRem :: Replace (Maybe ()) -> Bool
447 isRem = (== remPatch)
449 type PatchMap = PM.PatchMap
452 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
453 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
454 Transformable, Composable)
456 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
457 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
459 instance (Serialise a, Ord a) => Serialise (PatchMap a AddRem)
460 instance (Serialise a, Ord a) => Serialise (PatchMSet a)
462 -- TODO this breaks module abstraction
463 makePrisms ''PM.PatchMap
465 makePrisms ''PatchMSet
467 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
468 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
470 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
471 f = Map.partition isRem >>> both %~ Map.keysSet
473 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
474 g (rems, adds) = Map.fromSet (const remPatch) rems
475 <> Map.fromSet (const addPatch) adds
477 instance Ord a => Action (PatchMSet a) (MSet a) where
478 act (PatchMSet p) (MSet m) = MSet $ act p m
480 instance Ord a => Applicable (PatchMSet a) (MSet a) where
481 applicable (PatchMSet p) (MSet m) = applicable p m
483 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
484 toJSON = toJSON . view _PatchMSetIso
485 toEncoding = toEncoding . view _PatchMSetIso
487 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
488 parseJSON = fmap (_PatchMSetIso #) . parseJSON
490 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
491 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
493 instance ToSchema a => ToSchema (PatchMSet a) where
495 declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
497 type instance Patched (PatchMSet a) = MSet a
499 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
500 arbitrary = uncurry replace <$> arbitrary
501 -- If they happen to be equal then the patch is Keep.
503 instance ToSchema a => ToSchema (Replace a) where
504 declareNamedSchema (_ :: Proxy (Replace a)) = do
505 -- TODO Keep constructor is not supported here.
506 aSchema <- declareSchemaRef (Proxy :: Proxy a)
507 return $ NamedSchema (Just "Replace") $ mempty
508 & type_ ?~ SwaggerObject
510 InsOrdHashMap.fromList
514 & required .~ [ "old", "new" ]
517 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
518 , _patch_list :: Replace ListType -- TODO Map UserId ListType
520 deriving (Eq, Show, Generic)
522 deriveJSON (unPrefix "_") ''NgramsPatch
523 makeLenses ''NgramsPatch
525 instance ToSchema NgramsPatch where
526 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
528 instance Arbitrary NgramsPatch where
529 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
531 instance Serialise NgramsPatch
532 instance Serialise (Replace ListType)
533 instance Serialise ListType
535 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
537 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
538 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
540 instance Semigroup NgramsPatch where
541 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
543 instance Monoid NgramsPatch where
544 mempty = _NgramsPatch # mempty
546 instance Validity NgramsPatch where
547 validate p = p ^. _NgramsPatch . to validate
549 instance Transformable NgramsPatch where
550 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
552 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
554 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
556 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
558 type ConflictResolutionNgramsPatch =
559 ( ConflictResolutionPatchMSet NgramsTerm
560 , ConflictResolutionReplace ListType
562 type instance ConflictResolution NgramsPatch =
563 ConflictResolutionNgramsPatch
565 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
566 -- ~ Patched NgramsPatchIso
567 type instance Patched NgramsPatch = PatchedNgramsPatch
569 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
570 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
571 applicable p (Just nre) =
572 applicable (p ^. patch_children) (nre ^. nre_children) <>
573 applicable (p ^. patch_list) (nre ^. nre_list)
575 instance Action NgramsPatch NgramsRepoElement where
576 act p = (nre_children %~ act (p ^. patch_children))
577 . (nre_list %~ act (p ^. patch_list))
579 instance Action NgramsPatch (Maybe NgramsRepoElement) where
582 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
583 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
585 instance Serialise NgramsTablePatch
586 instance Serialise (PatchMap NgramsTerm NgramsPatch)
588 instance FromField NgramsTablePatch
590 fromField = fromField'
592 instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
594 fromField = fromField'
596 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
598 type instance ConflictResolution NgramsTablePatch =
599 NgramsTerm -> ConflictResolutionNgramsPatch
601 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
602 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
603 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
605 makePrisms ''NgramsTablePatch
606 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
607 instance ToSchema NgramsTablePatch
609 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
610 applicable p = applicable (p ^. _NgramsTablePatch)
612 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
614 fmap (execState (reParentNgramsTablePatch p)) .
615 act (p ^. _NgramsTablePatch)
617 instance Arbitrary NgramsTablePatch where
618 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
620 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
621 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
622 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
624 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
626 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
627 reRootChildren root ngram = do
628 nre <- use $ at ngram
629 forOf_ (_Just . nre_children . folded) nre $ \child -> do
630 at child . _Just . nre_root ?= root
631 reRootChildren root child
633 reParent :: Maybe RootParent -> ReParent NgramsTerm
634 reParent rp child = do
635 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
636 . (nre_root .~ (_rp_root <$> rp))
638 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
640 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
641 reParentAddRem rp child p =
642 reParent (if isRem p then Nothing else Just rp) child
644 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
645 reParentNgramsPatch parent ngramsPatch = do
646 root_of_parent <- use (at parent . _Just . nre_root)
648 root = fromMaybe parent root_of_parent
649 rp = RootParent { _rp_root = root, _rp_parent = parent }
650 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
651 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
653 reParentNgramsTablePatch :: ReParent NgramsTablePatch
654 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
655 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
657 ------------------------------------------------------------------------
658 ------------------------------------------------------------------------
661 data Versioned a = Versioned
662 { _v_version :: Version
665 deriving (Generic, Show, Eq)
666 deriveJSON (unPrefix "_v_") ''Versioned
667 makeLenses ''Versioned
668 instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where
669 declareNamedSchema = wellNamedSchema "_v_"
670 instance Arbitrary a => Arbitrary (Versioned a) where
671 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
675 -- TODO sequences of modifications (Patchs)
676 type NgramsIdPatch = Patch NgramsId NgramsPatch
678 ngramsPatch :: Int -> NgramsPatch
679 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
681 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
682 toEdit n p = Edit n p
683 ngramsIdPatch :: Patch NgramsId NgramsPatch
684 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
685 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
686 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
689 -- applyPatchBack :: Patch -> IO Patch
690 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
692 ------------------------------------------------------------------------
693 ------------------------------------------------------------------------
694 ------------------------------------------------------------------------
697 -- TODO: Replace.old is ignored which means that if the current list
698 -- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
699 -- the list is going to be `StopTerm` while it should keep `MapTerm`.
700 -- However this should not happen in non conflicting situations.
701 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
702 mkListsUpdate nt patches =
703 [ (ngramsTypeId nt, ng, listTypeId lt)
704 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
705 , lt <- patch ^.. patch_list . new
708 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
711 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
712 mkChildrenGroups addOrRem nt patches =
713 [ (ngramsTypeId nt, parent, child)
714 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
715 , child <- patch ^.. patch_children . to addOrRem . folded
719 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
720 ngramsTypeFromTabType tabType =
721 let lieu = "Garg.API.Ngrams: " :: Text in
723 Sources -> TableNgrams.Sources
724 Authors -> TableNgrams.Authors
725 Institutes -> TableNgrams.Institutes
726 Terms -> TableNgrams.NgramsTerms
727 _ -> panic $ lieu <> "No Ngrams for this tab"
728 -- TODO: This `panic` would disapear with custom NgramsType.
730 ------------------------------------------------------------------------
732 { _r_version :: Version
735 -- first patch in the list is the most recent
739 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
740 parseJSON = genericParseJSON $ unPrefix "_r_"
742 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
743 toJSON = genericToJSON $ unPrefix "_r_"
744 toEncoding = genericToEncoding $ unPrefix "_r_"
746 instance (Serialise s, Serialise p) => Serialise (Repo s p)
750 initRepo :: Monoid s => Repo s p
751 initRepo = Repo 1 mempty []
753 type NgramsRepo = Repo NgramsState NgramsStatePatch
754 type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
755 type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
757 instance Serialise (PM.PatchMap NodeId NgramsTablePatch)
758 instance Serialise NgramsStatePatch
760 initMockRepo :: NgramsRepo
761 initMockRepo = Repo 1 s []
763 s = Map.singleton TableNgrams.NgramsTerms
764 $ Map.singleton 47254
766 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
768 data RepoEnv = RepoEnv
769 { _renv_var :: !(MVar NgramsRepo)
770 , _renv_saver :: !(IO ())
771 , _renv_lock :: !FileLock
777 class HasRepoVar env where
778 repoVar :: Getter env (MVar NgramsRepo)
780 instance HasRepoVar (MVar NgramsRepo) where
783 class HasRepoSaver env where
784 repoSaver :: Getter env (IO ())
786 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
787 repoEnv :: Getter env RepoEnv
789 instance HasRepo RepoEnv where
792 instance HasRepoVar RepoEnv where
795 instance HasRepoSaver RepoEnv where
796 repoSaver = renv_saver
798 type RepoCmdM env err m =
801 , MonadBaseControl IO m
804 ------------------------------------------------------------------------
806 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
808 saveRepo = liftBase =<< view repoSaver
810 listTypeConflictResolution :: ListType -> ListType -> ListType
811 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
813 ngramsStatePatchConflictResolution
814 :: TableNgrams.NgramsType
817 -> ConflictResolutionNgramsPatch
818 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
820 -- undefined {- TODO think this through -}, listTypeConflictResolution)
823 -- Insertions are not considered as patches,
824 -- they do not extend history,
825 -- they do not bump version.
826 insertNewOnly :: a -> Maybe b -> a
827 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
828 -- TODO error handling
830 something :: Monoid a => Maybe a -> a
831 something Nothing = mempty
832 something (Just a) = a
835 -- TODO refactor with putListNgrams
836 copyListNgrams :: RepoCmdM env err m
837 => NodeId -> NodeId -> NgramsType
839 copyListNgrams srcListId dstListId ngramsType = do
841 liftBase $ modifyMVar_ var $
842 pure . (r_state . at ngramsType %~ (Just . f . something))
845 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
846 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
848 -- TODO refactor with putListNgrams
849 -- The list must be non-empty!
850 -- The added ngrams must be non-existent!
851 addListNgrams :: RepoCmdM env err m
852 => NodeId -> NgramsType
853 -> [NgramsElement] -> m ()
854 addListNgrams listId ngramsType nes = do
856 liftBase $ modifyMVar_ var $
857 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
860 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
863 rmListNgrams :: RepoCmdM env err m
865 -> TableNgrams.NgramsType
867 rmListNgrams l nt = setListNgrams l nt mempty
869 -- | TODO: incr the Version number
870 -- && should use patch
871 setListNgrams :: RepoCmdM env err m
873 -> TableNgrams.NgramsType
874 -> Map NgramsTerm NgramsRepoElement
876 setListNgrams listId ngramsType ns = do
878 liftBase $ modifyMVar_ var $
882 (at listId .~ ( Just ns))
889 -- If the given list of ngrams elements contains ngrams already in
890 -- the repo, they will be ignored.
891 putListNgrams :: RepoCmdM env err m
893 -> TableNgrams.NgramsType
894 -> [NgramsElement] -> m ()
895 putListNgrams _ _ [] = pure ()
896 putListNgrams listId ngramsType nes = putListNgrams' listId ngramsType m
898 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
900 putListNgrams' :: RepoCmdM env err m
902 -> TableNgrams.NgramsType
903 -> Map NgramsTerm NgramsRepoElement
905 putListNgrams' listId ngramsType ns = do
906 -- printDebug "putListNgrams" (length nes)
908 liftBase $ modifyMVar_ var $
925 tableNgramsPost :: RepoCmdM env err m
929 -> [NgramsTerm] -> m ()
930 tableNgramsPost tabType listId mayList =
931 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
933 currentVersion :: RepoCmdM env err m
937 r <- liftBase $ readMVar var
938 pure $ r ^. r_version
940 tableNgramsPull :: RepoCmdM env err m
942 -> TableNgrams.NgramsType
944 -> m (Versioned NgramsTablePatch)
945 tableNgramsPull listId ngramsType p_version = do
947 r <- liftBase $ readMVar var
950 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
951 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
953 pure (Versioned (r ^. r_version) q_table)
955 -- Apply the given patch to the DB and returns the patch to be applied on the
958 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
960 -> Versioned NgramsTablePatch
961 -> m (Versioned NgramsTablePatch)
962 tableNgramsPut tabType listId (Versioned p_version p_table)
963 | p_table == mempty = do
964 let ngramsType = ngramsTypeFromTabType tabType
965 tableNgramsPull listId ngramsType p_version
968 let ngramsType = ngramsTypeFromTabType tabType
969 (p0, p0_validity) = PM.singleton listId p_table
970 (p, p_validity) = PM.singleton ngramsType p0
972 assertValid p0_validity
973 assertValid p_validity
976 vq' <- liftBase $ modifyMVar var $ \r -> do
978 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
979 (p', q') = transformWith ngramsStatePatchConflictResolution p q
980 r' = r & r_version +~ 1
982 & r_history %~ (p' :)
983 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
985 -- Ideally we would like to check these properties. However:
986 -- * They should be checked only to debug the code. The client data
987 -- should be able to trigger these.
988 -- * What kind of error should they throw (we are in IO here)?
989 -- * Should we keep modifyMVar?
990 -- * Should we throw the validation in an Exception, catch it around
991 -- modifyMVar and throw it back as an Error?
992 assertValid $ transformable p q
993 assertValid $ applicable p' (r ^. r_state)
995 pure (r', Versioned (r' ^. r_version) q'_table)
1000 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
1001 mergeNgramsElement _neOld neNew = neNew
1003 { _ne_list :: ListType
1004 If we merge the parents/children we can potentially create cycles!
1005 , _ne_parent :: Maybe NgramsTerm
1006 , _ne_children :: MSet NgramsTerm
1010 getNgramsTableMap :: RepoCmdM env err m
1012 -> TableNgrams.NgramsType
1013 -> m (Versioned NgramsTableMap)
1014 getNgramsTableMap nodeId ngramsType = do
1016 repo <- liftBase $ readMVar v
1017 pure $ Versioned (repo ^. r_version)
1018 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1023 -- | TODO Errors management
1024 -- TODO: polymorphic for Annuaire or Corpus or ...
1025 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1026 -- TODO: should take only one ListId
1028 getTime' :: MonadBase IO m => m TimeSpec
1029 getTime' = liftBase $ getTime ProcessCPUTime
1032 getTableNgrams :: forall env err m.
1033 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1034 => NodeType -> NodeId -> TabType
1035 -> ListId -> Limit -> Maybe Offset
1037 -> Maybe MinSize -> Maybe MaxSize
1039 -> (NgramsTerm -> Bool)
1040 -> m (Versioned NgramsTable)
1041 getTableNgrams _nType nId tabType listId limit_ offset
1042 listType minSize maxSize orderBy searchQuery = do
1045 -- lIds <- selectNodesWithUsername NodeList userMaster
1047 ngramsType = ngramsTypeFromTabType tabType
1048 offset' = maybe 0 identity offset
1049 listType' = maybe (const True) (==) listType
1050 minSize' = maybe (const True) (<=) minSize
1051 maxSize' = maybe (const True) (>=) maxSize
1053 selected_node n = minSize' s
1055 && searchQuery (n ^. ne_ngrams)
1056 && listType' (n ^. ne_list)
1060 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1062 ---------------------------------------
1063 sortOnOrder Nothing = identity
1064 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1065 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1066 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1067 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1069 ---------------------------------------
1070 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1071 selectAndPaginate tableMap = roots <> inners
1073 list = tableMap ^.. each
1074 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1076 selected_nodes = list & take limit_
1078 . filter selected_node
1079 . sortOnOrder orderBy
1080 roots = rootOf <$> selected_nodes
1081 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1082 inners = list & filter (selected_inner rootsSet)
1084 ---------------------------------------
1085 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1086 setScores False table = pure table
1087 setScores True table = do
1088 let ngrams_terms = (table ^.. each . ne_ngrams)
1090 occurrences <- getOccByNgramsOnlyFast' nId
1095 liftBase $ hprint stderr
1096 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1097 (length ngrams_terms) t1 t2
1099 occurrences <- getOccByNgramsOnlySlow nType nId
1105 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1107 pure $ table & each %~ setOcc
1108 ---------------------------------------
1110 -- lists <- catMaybes <$> listsWith userMaster
1111 -- trace (show lists) $
1112 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1114 let scoresNeeded = needsScores orderBy
1115 tableMap1 <- getNgramsTableMap listId ngramsType
1117 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1118 . Map.mapWithKey ngramsElementFromRepo
1120 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1121 . setScores (not scoresNeeded)
1124 liftBase $ hprint stderr
1125 ("getTableNgrams total=" % timeSpecs
1126 % " map1=" % timeSpecs
1127 % " map2=" % timeSpecs
1128 % " map3=" % timeSpecs
1129 % " sql=" % (if scoresNeeded then "map2" else "map3")
1131 ) t0 t3 t0 t1 t1 t2 t2 t3
1135 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env) => NodeId -> TabType -> ListId -> m Int
1136 scoresRecomputeTableNgrams nId tabType listId = do
1137 tableMap <- getNgramsTableMap listId ngramsType
1138 _ <- tableMap & v_data %%~ setScores
1139 . Map.mapWithKey ngramsElementFromRepo
1143 ngramsType = ngramsTypeFromTabType tabType
1145 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
1146 setScores table = do
1147 let ngrams_terms = (table ^.. each . ne_ngrams)
1148 occurrences <- getOccByNgramsOnlyFast' nId
1153 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1155 pure $ table & each %~ setOcc
1161 -- TODO: find a better place for the code above, All APIs stay here
1162 type QueryParamR = QueryParam' '[Required, Strict]
1164 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1165 deriving (Generic, Enum, Bounded, Read, Show)
1167 instance FromHttpApiData OrderBy
1169 parseUrlPiece "TermAsc" = pure TermAsc
1170 parseUrlPiece "TermDesc" = pure TermDesc
1171 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1172 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1173 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1176 instance ToParamSchema OrderBy
1177 instance FromJSON OrderBy
1178 instance ToJSON OrderBy
1179 instance ToSchema OrderBy
1180 instance Arbitrary OrderBy
1182 arbitrary = elements [minBound..maxBound]
1184 needsScores :: Maybe OrderBy -> Bool
1185 needsScores (Just ScoreAsc) = True
1186 needsScores (Just ScoreDesc) = True
1187 needsScores _ = False
1189 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1190 :> QueryParamR "ngramsType" TabType
1191 :> QueryParamR "list" ListId
1192 :> QueryParamR "limit" Limit
1193 :> QueryParam "offset" Offset
1194 :> QueryParam "listType" ListType
1195 :> QueryParam "minTermSize" MinSize
1196 :> QueryParam "maxTermSize" MaxSize
1197 :> QueryParam "orderBy" OrderBy
1198 :> QueryParam "search" Text
1199 :> Get '[JSON] (Versioned NgramsTable)
1201 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1202 :> QueryParamR "ngramsType" TabType
1203 :> QueryParamR "list" ListId
1204 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1205 :> Put '[JSON] (Versioned NgramsTablePatch)
1207 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1208 :> QueryParamR "ngramsType" TabType
1209 :> QueryParamR "list" ListId
1210 :> QueryParam "listType" ListType
1211 :> ReqBody '[JSON] [NgramsTerm]
1214 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
1215 :> QueryParamR "ngramsType" TabType
1216 :> QueryParamR "list" ListId
1217 :> "recompute" :> Post '[JSON] Int
1219 type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
1220 :> QueryParamR "ngramsType" TabType
1221 :> QueryParamR "list" ListId
1222 :> Get '[JSON] Version
1224 type TableNgramsApi = TableNgramsApiGet
1225 :<|> TableNgramsApiPut
1226 :<|> TableNgramsApiPost
1227 :<|> RecomputeScoresNgramsApiGet
1228 :<|> "version" :> TableNgramsApiGetVersion
1230 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1237 -> Maybe MinSize -> Maybe MaxSize
1239 -> Maybe Text -- full text search
1240 -> m (Versioned NgramsTable)
1241 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1242 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1244 searchQuery = maybe (const True) isInfixOf mt
1246 getTableNgramsVersion :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1251 getTableNgramsVersion nId tabType listId = do
1253 Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
1256 -- | Text search is deactivated for now for ngrams by doc only
1257 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
1259 -> ListId -> Limit -> Maybe Offset
1261 -> Maybe MinSize -> Maybe MaxSize
1263 -> Maybe Text -- full text search
1264 -> m (Versioned NgramsTable)
1265 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1266 ns <- selectNodesWithUsername NodeList userMaster
1267 let ngramsType = ngramsTypeFromTabType tabType
1268 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1269 let searchQuery = flip S.member (S.fromList ngs)
1270 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1274 apiNgramsTableCorpus :: ( RepoCmdM env err m
1276 , HasInvalidError err
1277 , HasConnectionPool env
1280 => NodeId -> ServerT TableNgramsApi m
1281 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1283 :<|> tableNgramsPost
1284 :<|> scoresRecomputeTableNgrams cId
1285 :<|> getTableNgramsVersion cId
1287 apiNgramsTableDoc :: ( RepoCmdM env err m
1289 , HasInvalidError err
1290 , HasConnectionPool env
1293 => DocId -> ServerT TableNgramsApi m
1294 apiNgramsTableDoc dId = getTableNgramsDoc dId
1296 :<|> tableNgramsPost
1297 :<|> scoresRecomputeTableNgrams dId
1298 :<|> getTableNgramsVersion dId
1299 -- > add new ngrams in database (TODO AD)
1300 -- > index all the corpus accordingly (TODO AD)
1302 listNgramsChangedSince :: RepoCmdM env err m
1303 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
1304 listNgramsChangedSince listId ngramsType version
1306 Versioned <$> currentVersion <*> pure True
1308 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1311 instance Arbitrary NgramsRepoElement where
1312 arbitrary = elements $ map ngramsElementToRepo ns
1314 NgramsTable ns = mockTable
1317 instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
1319 parseUrlPiece x = maybeToEither x (decode $ cs x)