1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE DataKinds #-}
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24 {-# LANGUAGE ScopedTypeVariables #-}
25 {-# LANGUAGE TemplateHaskell #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE FlexibleContexts #-}
28 {-# LANGUAGE FlexibleInstances #-}
29 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
30 {-# LANGUAGE MultiParamTypeClasses #-}
31 {-# LANGUAGE RankNTypes #-}
32 {-# LANGUAGE TypeFamilies #-}
33 {-# OPTIONS -fno-warn-orphans #-}
35 module Gargantext.API.Ngrams
47 , apiNgramsTableCorpus
69 , NgramsRepoElement(..)
78 , ngramsTypeFromTabType
95 , listNgramsChangedSince
99 import Control.Category ((>>>))
100 import Control.Concurrent
101 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
102 import Control.Monad.Base (MonadBase, liftBase)
103 import Control.Monad.Error.Class (MonadError)
104 import Control.Monad.Reader
105 import Control.Monad.State
106 import Control.Monad.Trans.Control (MonadBaseControl)
107 import Data.Aeson hiding ((.=))
108 import Data.Aeson.TH (deriveJSON)
109 import Data.Either(Either(Left))
110 import Data.Either.Extra (maybeToEither)
112 import Data.Map.Strict (Map)
113 import Data.Maybe (fromMaybe)
115 import Data.Ord (Down(..))
116 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours)
117 import Data.Set (Set)
118 import Data.Swagger hiding (version, patch)
119 import Data.Text (Text, isInfixOf, count)
121 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
122 import Formatting (hprint, int, (%))
123 import Formatting.Clock (timeSpecs)
124 import GHC.Generics (Generic)
125 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
126 import Gargantext.Core.Types (TODO)
127 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
128 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
129 import Gargantext.Database.Query.Table.Node.Select
130 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
131 import Gargantext.Database.Admin.Config (userMaster)
132 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
133 import Gargantext.Database.Admin.Types.Node (NodeType(..))
134 import Gargantext.Database.Prelude (fromField', HasConnectionPool)
135 import Gargantext.Prelude
136 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
137 import Servant hiding (Patch)
138 import System.Clock (getTime, TimeSpec, Clock(..))
139 import System.FileLock (FileLock)
140 import System.IO (stderr)
141 import Test.QuickCheck (elements)
142 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
143 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
144 import qualified Data.List as List
145 import qualified Data.Map.Strict as Map
146 import qualified Data.Map.Strict.Patch as PM
147 import qualified Data.Set as S
148 import qualified Data.Set as Set
149 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
151 ------------------------------------------------------------------------
152 --data FacetFormat = Table | Chart
153 data TabType = Docs | Trash | MoreFav | MoreTrash
154 | Terms | Sources | Authors | Institutes
156 deriving (Generic, Enum, Bounded, Show)
158 instance FromHttpApiData TabType
160 parseUrlPiece "Docs" = pure Docs
161 parseUrlPiece "Trash" = pure Trash
162 parseUrlPiece "MoreFav" = pure MoreFav
163 parseUrlPiece "MoreTrash" = pure MoreTrash
165 parseUrlPiece "Terms" = pure Terms
166 parseUrlPiece "Sources" = pure Sources
167 parseUrlPiece "Institutes" = pure Institutes
168 parseUrlPiece "Authors" = pure Authors
170 parseUrlPiece "Contacts" = pure Contacts
172 parseUrlPiece _ = Left "Unexpected value of TabType"
174 instance ToParamSchema TabType
175 instance ToJSON TabType
176 instance FromJSON TabType
177 instance ToSchema TabType
178 instance Arbitrary TabType
180 arbitrary = elements [minBound .. maxBound]
182 newtype MSet a = MSet (Map a ())
183 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
185 instance ToJSON a => ToJSON (MSet a) where
186 toJSON (MSet m) = toJSON (Map.keys m)
187 toEncoding (MSet m) = toEncoding (Map.keys m)
189 mSetFromSet :: Set a -> MSet a
190 mSetFromSet = MSet . Map.fromSet (const ())
192 mSetFromList :: Ord a => [a] -> MSet a
193 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
195 -- mSetToSet :: Ord a => MSet a -> Set a
196 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
197 mSetToSet :: Ord a => MSet a -> Set a
198 mSetToSet = Set.fromList . mSetToList
200 mSetToList :: MSet a -> [a]
201 mSetToList (MSet a) = Map.keys a
203 instance Foldable MSet where
204 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
206 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
207 parseJSON = fmap mSetFromList . parseJSON
209 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
211 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
213 ------------------------------------------------------------------------
214 type NgramsTerm = Text
216 data RootParent = RootParent
217 { _rp_root :: NgramsTerm
218 , _rp_parent :: NgramsTerm
220 deriving (Ord, Eq, Show, Generic)
222 deriveJSON (unPrefix "_rp_") ''RootParent
223 makeLenses ''RootParent
225 data NgramsRepoElement = NgramsRepoElement
227 , _nre_list :: ListType
228 --, _nre_root_parent :: Maybe RootParent
229 , _nre_root :: Maybe NgramsTerm
230 , _nre_parent :: Maybe NgramsTerm
231 , _nre_children :: MSet NgramsTerm
233 deriving (Ord, Eq, Show, Generic)
235 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
236 makeLenses ''NgramsRepoElement
238 instance ToSchema NgramsRepoElement where
239 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
243 NgramsElement { _ne_ngrams :: NgramsTerm
245 , _ne_list :: ListType
246 , _ne_occurrences :: Int
247 , _ne_root :: Maybe NgramsTerm
248 , _ne_parent :: Maybe NgramsTerm
249 , _ne_children :: MSet NgramsTerm
251 deriving (Ord, Eq, Show, Generic)
253 deriveJSON (unPrefix "_ne_") ''NgramsElement
254 makeLenses ''NgramsElement
256 mkNgramsElement :: NgramsTerm
261 mkNgramsElement ngrams list rp children =
262 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
265 size = 1 + count " " ngrams
267 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
268 newNgramsElement mayList ngrams =
269 mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
271 instance ToSchema NgramsElement where
272 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
273 instance Arbitrary NgramsElement where
274 arbitrary = elements [newNgramsElement Nothing "sport"]
276 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
278 (NgramsElement { _ne_size = s
292 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
293 ngramsElementFromRepo
302 NgramsElement { _ne_size = s
307 , _ne_ngrams = ngrams
308 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
310 -- Here we could use 0 if we want to avoid any `panic`.
311 -- It will not happen using getTableNgrams if
312 -- getOccByNgramsOnly provides a count of occurrences for
313 -- all the ngrams given.
317 ------------------------------------------------------------------------
318 newtype NgramsTable = NgramsTable [NgramsElement]
319 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
321 type NgramsList = NgramsTable
323 makePrisms ''NgramsTable
325 -- | Question: why these repetition of Type in this instance
326 -- may you document it please ?
327 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
328 each = _NgramsTable . each
331 -- | TODO Check N and Weight
333 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
334 toNgramsElement ns = map toNgramsElement' ns
336 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
340 Just x -> lookup x mapParent
341 c' = maybe mempty identity $ lookup t mapChildren
342 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
344 mapParent :: Map Int Text
345 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
347 mapChildren :: Map Text (Set Text)
348 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
349 $ Map.fromListWith (<>)
350 $ map (first fromJust)
351 $ filter (isJust . fst)
352 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
355 mockTable :: NgramsTable
356 mockTable = NgramsTable
357 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
358 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
359 , mkNgramsElement "cats" StopTerm Nothing mempty
360 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
361 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
362 , mkNgramsElement "fox" GraphTerm Nothing mempty
363 , mkNgramsElement "object" CandidateTerm Nothing mempty
364 , mkNgramsElement "nothing" StopTerm Nothing mempty
365 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
366 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
367 , mkNgramsElement "moon" CandidateTerm Nothing mempty
368 , mkNgramsElement "sky" StopTerm Nothing mempty
371 rp n = Just $ RootParent n n
373 instance Arbitrary NgramsTable where
374 arbitrary = pure mockTable
376 instance ToSchema NgramsTable
378 ------------------------------------------------------------------------
379 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
380 ------------------------------------------------------------------------
381 -- On the Client side:
382 --data Action = InGroup NgramsId NgramsId
383 -- | OutGroup NgramsId NgramsId
384 -- | SetListType NgramsId ListType
386 data PatchSet a = PatchSet
390 deriving (Eq, Ord, Show, Generic)
392 makeLenses ''PatchSet
393 makePrisms ''PatchSet
395 instance ToJSON a => ToJSON (PatchSet a) where
396 toJSON = genericToJSON $ unPrefix "_"
397 toEncoding = genericToEncoding $ unPrefix "_"
399 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
400 parseJSON = genericParseJSON $ unPrefix "_"
403 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
404 arbitrary = PatchSet <$> arbitrary <*> arbitrary
406 type instance Patched (PatchSet a) = Set a
408 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
409 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
411 instance Ord a => Semigroup (PatchSet a) where
412 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
413 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
416 instance Ord a => Monoid (PatchSet a) where
417 mempty = PatchSet mempty mempty
419 instance Ord a => Group (PatchSet a) where
420 invert (PatchSet r a) = PatchSet a r
422 instance Ord a => Composable (PatchSet a) where
423 composable _ _ = undefined
425 instance Ord a => Action (PatchSet a) (Set a) where
426 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
428 instance Applicable (PatchSet a) (Set a) where
429 applicable _ _ = mempty
431 instance Ord a => Validity (PatchSet a) where
432 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
434 instance Ord a => Transformable (PatchSet a) where
435 transformable = undefined
437 conflicts _p _q = undefined
439 transformWith conflict p q = undefined conflict p q
441 instance ToSchema a => ToSchema (PatchSet a)
444 type AddRem = Replace (Maybe ())
446 remPatch, addPatch :: AddRem
447 remPatch = replace (Just ()) Nothing
448 addPatch = replace Nothing (Just ())
450 isRem :: Replace (Maybe ()) -> Bool
451 isRem = (== remPatch)
453 type PatchMap = PM.PatchMap
455 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
456 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
457 Transformable, Composable)
459 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
460 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
462 -- TODO this breaks module abstraction
463 makePrisms ''PM.PatchMap
465 makePrisms ''PatchMSet
467 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
468 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
470 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
471 f = Map.partition isRem >>> both %~ Map.keysSet
473 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
474 g (rems, adds) = Map.fromSet (const remPatch) rems
475 <> Map.fromSet (const addPatch) adds
477 instance Ord a => Action (PatchMSet a) (MSet a) where
478 act (PatchMSet p) (MSet m) = MSet $ act p m
480 instance Ord a => Applicable (PatchMSet a) (MSet a) where
481 applicable (PatchMSet p) (MSet m) = applicable p m
483 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
484 toJSON = toJSON . view _PatchMSetIso
485 toEncoding = toEncoding . view _PatchMSetIso
487 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
488 parseJSON = fmap (_PatchMSetIso #) . parseJSON
490 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
491 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
493 instance ToSchema a => ToSchema (PatchMSet a) where
495 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
497 type instance Patched (PatchMSet a) = MSet a
499 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
500 arbitrary = uncurry replace <$> arbitrary
501 -- If they happen to be equal then the patch is Keep.
503 instance ToSchema a => ToSchema (Replace a) where
504 declareNamedSchema (_ :: Proxy (Replace a)) = do
505 -- TODO Keep constructor is not supported here.
506 aSchema <- declareSchemaRef (Proxy :: Proxy a)
507 return $ NamedSchema (Just "Replace") $ mempty
508 & type_ ?~ SwaggerObject
510 InsOrdHashMap.fromList
514 & required .~ [ "old", "new" ]
517 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
518 , _patch_list :: Replace ListType -- TODO Map UserId ListType
520 deriving (Eq, Show, Generic)
522 deriveJSON (unPrefix "_") ''NgramsPatch
523 makeLenses ''NgramsPatch
525 instance ToSchema NgramsPatch where
526 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
528 instance Arbitrary NgramsPatch where
529 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
531 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
533 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
534 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
536 instance Semigroup NgramsPatch where
537 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
539 instance Monoid NgramsPatch where
540 mempty = _NgramsPatch # mempty
542 instance Validity NgramsPatch where
543 validate p = p ^. _NgramsPatch . to validate
545 instance Transformable NgramsPatch where
546 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
548 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
550 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
552 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
554 type ConflictResolutionNgramsPatch =
555 ( ConflictResolutionPatchMSet NgramsTerm
556 , ConflictResolutionReplace ListType
558 type instance ConflictResolution NgramsPatch =
559 ConflictResolutionNgramsPatch
561 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
562 -- ~ Patched NgramsPatchIso
563 type instance Patched NgramsPatch = PatchedNgramsPatch
565 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
566 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
567 applicable p (Just nre) =
568 applicable (p ^. patch_children) (nre ^. nre_children) <>
569 applicable (p ^. patch_list) (nre ^. nre_list)
571 instance Action NgramsPatch NgramsRepoElement where
572 act p = (nre_children %~ act (p ^. patch_children))
573 . (nre_list %~ act (p ^. patch_list))
575 instance Action NgramsPatch (Maybe NgramsRepoElement) where
578 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
579 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
581 instance FromField NgramsTablePatch
583 fromField = fromField'
585 instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
587 fromField = fromField'
589 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
591 type instance ConflictResolution NgramsTablePatch =
592 NgramsTerm -> ConflictResolutionNgramsPatch
594 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
595 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
596 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
598 makePrisms ''NgramsTablePatch
599 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
600 instance ToSchema NgramsTablePatch
602 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
603 applicable p = applicable (p ^. _NgramsTablePatch)
605 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
607 fmap (execState (reParentNgramsTablePatch p)) .
608 act (p ^. _NgramsTablePatch)
610 instance Arbitrary NgramsTablePatch where
611 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
613 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
614 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
615 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
617 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
619 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
620 reRootChildren root ngram = do
621 nre <- use $ at ngram
622 forOf_ (_Just . nre_children . folded) nre $ \child -> do
623 at child . _Just . nre_root ?= root
624 reRootChildren root child
626 reParent :: Maybe RootParent -> ReParent NgramsTerm
627 reParent rp child = do
628 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
629 . (nre_root .~ (_rp_root <$> rp))
631 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
633 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
634 reParentAddRem rp child p =
635 reParent (if isRem p then Nothing else Just rp) child
637 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
638 reParentNgramsPatch parent ngramsPatch = do
639 root_of_parent <- use (at parent . _Just . nre_root)
641 root = fromMaybe parent root_of_parent
642 rp = RootParent { _rp_root = root, _rp_parent = parent }
643 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
644 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
646 reParentNgramsTablePatch :: ReParent NgramsTablePatch
647 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
648 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
650 ------------------------------------------------------------------------
651 ------------------------------------------------------------------------
654 data Versioned a = Versioned
655 { _v_version :: Version
658 deriving (Generic, Show, Eq)
659 deriveJSON (unPrefix "_v_") ''Versioned
660 makeLenses ''Versioned
661 instance ToSchema a => ToSchema (Versioned a) where
662 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
663 instance Arbitrary a => Arbitrary (Versioned a) where
664 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
668 -- TODO sequences of modifications (Patchs)
669 type NgramsIdPatch = Patch NgramsId NgramsPatch
671 ngramsPatch :: Int -> NgramsPatch
672 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
674 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
675 toEdit n p = Edit n p
676 ngramsIdPatch :: Patch NgramsId NgramsPatch
677 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
678 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
679 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
682 -- applyPatchBack :: Patch -> IO Patch
683 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
685 ------------------------------------------------------------------------
686 ------------------------------------------------------------------------
687 ------------------------------------------------------------------------
690 -- TODO: Replace.old is ignored which means that if the current list
691 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
692 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
693 -- However this should not happen in non conflicting situations.
694 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
695 mkListsUpdate nt patches =
696 [ (ngramsTypeId nt, ng, listTypeId lt)
697 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
698 , lt <- patch ^.. patch_list . new
701 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
704 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
705 mkChildrenGroups addOrRem nt patches =
706 [ (ngramsTypeId nt, parent, child)
707 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
708 , child <- patch ^.. patch_children . to addOrRem . folded
712 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
713 ngramsTypeFromTabType tabType =
714 let lieu = "Garg.API.Ngrams: " :: Text in
716 Sources -> TableNgrams.Sources
717 Authors -> TableNgrams.Authors
718 Institutes -> TableNgrams.Institutes
719 Terms -> TableNgrams.NgramsTerms
720 _ -> panic $ lieu <> "No Ngrams for this tab"
721 -- TODO: This `panic` would disapear with custom NgramsType.
723 ------------------------------------------------------------------------
725 { _r_version :: Version
728 -- first patch in the list is the most recent
732 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
733 parseJSON = genericParseJSON $ unPrefix "_r_"
735 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
736 toJSON = genericToJSON $ unPrefix "_r_"
737 toEncoding = genericToEncoding $ unPrefix "_r_"
741 initRepo :: Monoid s => Repo s p
742 initRepo = Repo 1 mempty []
744 type NgramsRepo = Repo NgramsState NgramsStatePatch
745 type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
746 type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
748 initMockRepo :: NgramsRepo
749 initMockRepo = Repo 1 s []
751 s = Map.singleton TableNgrams.NgramsTerms
752 $ Map.singleton 47254
754 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
756 data RepoEnv = RepoEnv
757 { _renv_var :: !(MVar NgramsRepo)
758 , _renv_saver :: !(IO ())
759 , _renv_lock :: !FileLock
765 class HasRepoVar env where
766 repoVar :: Getter env (MVar NgramsRepo)
768 instance HasRepoVar (MVar NgramsRepo) where
771 class HasRepoSaver env where
772 repoSaver :: Getter env (IO ())
774 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
775 repoEnv :: Getter env RepoEnv
777 instance HasRepo RepoEnv where
780 instance HasRepoVar RepoEnv where
783 instance HasRepoSaver RepoEnv where
784 repoSaver = renv_saver
786 type RepoCmdM env err m =
789 , MonadBaseControl IO m
792 ------------------------------------------------------------------------
794 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
796 saveRepo = liftBase =<< view repoSaver
798 listTypeConflictResolution :: ListType -> ListType -> ListType
799 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
801 ngramsStatePatchConflictResolution
802 :: TableNgrams.NgramsType
805 -> ConflictResolutionNgramsPatch
806 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
808 -- undefined {- TODO think this through -}, listTypeConflictResolution)
811 -- Insertions are not considered as patches,
812 -- they do not extend history,
813 -- they do not bump version.
814 insertNewOnly :: a -> Maybe b -> a
815 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
816 -- TODO error handling
818 something :: Monoid a => Maybe a -> a
819 something Nothing = mempty
820 something (Just a) = a
823 -- TODO refactor with putListNgrams
824 copyListNgrams :: RepoCmdM env err m
825 => NodeId -> NodeId -> NgramsType
827 copyListNgrams srcListId dstListId ngramsType = do
829 liftBase $ modifyMVar_ var $
830 pure . (r_state . at ngramsType %~ (Just . f . something))
833 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
834 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
836 -- TODO refactor with putListNgrams
837 -- The list must be non-empty!
838 -- The added ngrams must be non-existent!
839 addListNgrams :: RepoCmdM env err m
840 => NodeId -> NgramsType
841 -> [NgramsElement] -> m ()
842 addListNgrams listId ngramsType nes = do
844 liftBase $ modifyMVar_ var $
845 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
848 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
851 rmListNgrams :: RepoCmdM env err m
853 -> TableNgrams.NgramsType
855 rmListNgrams l nt = setListNgrams l nt mempty
857 -- | TODO: incr the Version number
858 -- && should use patch
859 setListNgrams :: RepoCmdM env err m
861 -> TableNgrams.NgramsType
862 -> Map NgramsTerm NgramsRepoElement
864 setListNgrams listId ngramsType ns = do
866 liftBase $ modifyMVar_ var $
870 (at listId .~ ( Just ns))
877 -- If the given list of ngrams elements contains ngrams already in
878 -- the repo, they will be ignored.
879 putListNgrams :: RepoCmdM env err m
881 -> TableNgrams.NgramsType
882 -> [NgramsElement] -> m ()
883 putListNgrams _ _ [] = pure ()
884 putListNgrams listId ngramsType nes = putListNgrams' listId ngramsType m
886 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
888 putListNgrams' :: RepoCmdM env err m
890 -> TableNgrams.NgramsType
891 -> Map NgramsTerm NgramsRepoElement
893 putListNgrams' listId ngramsType ns = do
894 -- printDebug "putListNgrams" (length nes)
896 liftBase $ modifyMVar_ var $
913 tableNgramsPost :: RepoCmdM env err m
917 -> [NgramsTerm] -> m ()
918 tableNgramsPost tabType listId mayList =
919 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
921 currentVersion :: RepoCmdM env err m
925 r <- liftBase $ readMVar var
926 pure $ r ^. r_version
928 tableNgramsPull :: RepoCmdM env err m
930 -> TableNgrams.NgramsType
932 -> m (Versioned NgramsTablePatch)
933 tableNgramsPull listId ngramsType p_version = do
935 r <- liftBase $ readMVar var
938 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
939 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
941 pure (Versioned (r ^. r_version) q_table)
943 -- Apply the given patch to the DB and returns the patch to be applied on the
946 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
948 -> Versioned NgramsTablePatch
949 -> m (Versioned NgramsTablePatch)
950 tableNgramsPut tabType listId (Versioned p_version p_table)
951 | p_table == mempty = do
952 let ngramsType = ngramsTypeFromTabType tabType
953 tableNgramsPull listId ngramsType p_version
956 let ngramsType = ngramsTypeFromTabType tabType
957 (p0, p0_validity) = PM.singleton listId p_table
958 (p, p_validity) = PM.singleton ngramsType p0
960 assertValid p0_validity
961 assertValid p_validity
964 vq' <- liftBase $ modifyMVar var $ \r -> do
966 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
967 (p', q') = transformWith ngramsStatePatchConflictResolution p q
968 r' = r & r_version +~ 1
970 & r_history %~ (p' :)
971 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
973 -- Ideally we would like to check these properties. However:
974 -- * They should be checked only to debug the code. The client data
975 -- should be able to trigger these.
976 -- * What kind of error should they throw (we are in IO here)?
977 -- * Should we keep modifyMVar?
978 -- * Should we throw the validation in an Exception, catch it around
979 -- modifyMVar and throw it back as an Error?
980 assertValid $ transformable p q
981 assertValid $ applicable p' (r ^. r_state)
983 pure (r', Versioned (r' ^. r_version) q'_table)
988 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
989 mergeNgramsElement _neOld neNew = neNew
991 { _ne_list :: ListType
992 If we merge the parents/children we can potentially create cycles!
993 , _ne_parent :: Maybe NgramsTerm
994 , _ne_children :: MSet NgramsTerm
998 getNgramsTableMap :: RepoCmdM env err m
1000 -> TableNgrams.NgramsType
1001 -> m (Versioned NgramsTableMap)
1002 getNgramsTableMap nodeId ngramsType = do
1004 repo <- liftBase $ readMVar v
1005 pure $ Versioned (repo ^. r_version)
1006 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1011 -- | TODO Errors management
1012 -- TODO: polymorphic for Annuaire or Corpus or ...
1013 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1014 -- TODO: should take only one ListId
1016 getTime' :: MonadBase IO m => m TimeSpec
1017 getTime' = liftBase $ getTime ProcessCPUTime
1020 getTableNgrams :: forall env err m.
1021 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1022 => NodeType -> NodeId -> TabType
1023 -> ListId -> Limit -> Maybe Offset
1025 -> Maybe MinSize -> Maybe MaxSize
1027 -> (NgramsTerm -> Bool)
1028 -> m (Versioned NgramsTable)
1029 getTableNgrams _nType nId tabType listId limit_ offset
1030 listType minSize maxSize orderBy searchQuery = do
1033 -- lIds <- selectNodesWithUsername NodeList userMaster
1035 ngramsType = ngramsTypeFromTabType tabType
1036 offset' = maybe 0 identity offset
1037 listType' = maybe (const True) (==) listType
1038 minSize' = maybe (const True) (<=) minSize
1039 maxSize' = maybe (const True) (>=) maxSize
1041 selected_node n = minSize' s
1043 && searchQuery (n ^. ne_ngrams)
1044 && listType' (n ^. ne_list)
1048 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1050 ---------------------------------------
1051 sortOnOrder Nothing = identity
1052 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1053 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1054 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1055 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1057 ---------------------------------------
1058 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1059 selectAndPaginate tableMap = roots <> inners
1061 list = tableMap ^.. each
1062 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1064 selected_nodes = list & take limit_
1066 . filter selected_node
1067 . sortOnOrder orderBy
1068 roots = rootOf <$> selected_nodes
1069 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1070 inners = list & filter (selected_inner rootsSet)
1072 ---------------------------------------
1073 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1074 setScores False table = pure table
1075 setScores True table = do
1076 let ngrams_terms = (table ^.. each . ne_ngrams)
1078 occurrences <- getOccByNgramsOnlyFast' nId
1083 liftBase $ hprint stderr
1084 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1085 (length ngrams_terms) t1 t2
1087 occurrences <- getOccByNgramsOnlySlow nType nId
1093 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1095 pure $ table & each %~ setOcc
1096 ---------------------------------------
1098 -- lists <- catMaybes <$> listsWith userMaster
1099 -- trace (show lists) $
1100 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1102 let scoresNeeded = needsScores orderBy
1103 tableMap1 <- getNgramsTableMap listId ngramsType
1105 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1106 . Map.mapWithKey ngramsElementFromRepo
1108 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1109 . setScores (not scoresNeeded)
1112 liftBase $ hprint stderr
1113 ("getTableNgrams total=" % timeSpecs
1114 % " map1=" % timeSpecs
1115 % " map2=" % timeSpecs
1116 % " map3=" % timeSpecs
1117 % " sql=" % (if scoresNeeded then "map2" else "map3")
1119 ) t0 t3 t0 t1 t1 t2 t2 t3
1125 -- TODO: find a better place for the code above, All APIs stay here
1126 type QueryParamR = QueryParam' '[Required, Strict]
1128 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1129 deriving (Generic, Enum, Bounded, Read, Show)
1131 instance FromHttpApiData OrderBy
1133 parseUrlPiece "TermAsc" = pure TermAsc
1134 parseUrlPiece "TermDesc" = pure TermDesc
1135 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1136 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1137 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1140 instance ToParamSchema OrderBy
1141 instance FromJSON OrderBy
1142 instance ToJSON OrderBy
1143 instance ToSchema OrderBy
1144 instance Arbitrary OrderBy
1146 arbitrary = elements [minBound..maxBound]
1148 needsScores :: Maybe OrderBy -> Bool
1149 needsScores (Just ScoreAsc) = True
1150 needsScores (Just ScoreDesc) = True
1151 needsScores _ = False
1153 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1154 :> QueryParamR "ngramsType" TabType
1155 :> QueryParamR "list" ListId
1156 :> QueryParamR "limit" Limit
1157 :> QueryParam "offset" Offset
1158 :> QueryParam "listType" ListType
1159 :> QueryParam "minTermSize" MinSize
1160 :> QueryParam "maxTermSize" MaxSize
1161 :> QueryParam "orderBy" OrderBy
1162 :> QueryParam "search" Text
1163 :> Get '[JSON] (Versioned NgramsTable)
1165 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1166 :> QueryParamR "ngramsType" TabType
1167 :> QueryParamR "list" ListId
1168 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1169 :> Put '[JSON] (Versioned NgramsTablePatch)
1171 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1172 :> QueryParamR "ngramsType" TabType
1173 :> QueryParamR "list" ListId
1174 :> QueryParam "listType" ListType
1175 :> ReqBody '[JSON] [NgramsTerm]
1178 type TableNgramsApi = TableNgramsApiGet
1179 :<|> TableNgramsApiPut
1180 :<|> TableNgramsApiPost
1182 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1183 => NodeId -> TabType
1184 -> ListId -> Limit -> Maybe Offset
1186 -> Maybe MinSize -> Maybe MaxSize
1188 -> Maybe Text -- full text search
1189 -> m (Versioned NgramsTable)
1190 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1191 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1193 searchQuery = maybe (const True) isInfixOf mt
1195 -- | Text search is deactivated for now for ngrams by doc only
1196 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1198 -> ListId -> Limit -> Maybe Offset
1200 -> Maybe MinSize -> Maybe MaxSize
1202 -> Maybe Text -- full text search
1203 -> m (Versioned NgramsTable)
1204 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1205 ns <- selectNodesWithUsername NodeList userMaster
1206 let ngramsType = ngramsTypeFromTabType tabType
1207 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1208 let searchQuery = flip S.member (S.fromList ngs)
1209 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1213 apiNgramsTableCorpus :: ( RepoCmdM env err m
1215 , HasInvalidError err
1216 , HasConnectionPool env
1218 => NodeId -> ServerT TableNgramsApi m
1219 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1221 :<|> tableNgramsPost
1224 apiNgramsTableDoc :: ( RepoCmdM env err m
1226 , HasInvalidError err
1227 , HasConnectionPool env
1229 => DocId -> ServerT TableNgramsApi m
1230 apiNgramsTableDoc dId = getTableNgramsDoc dId
1232 :<|> tableNgramsPost
1233 -- > add new ngrams in database (TODO AD)
1234 -- > index all the corpus accordingly (TODO AD)
1236 listNgramsChangedSince :: RepoCmdM env err m
1237 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
1238 listNgramsChangedSince listId ngramsType version
1240 Versioned <$> currentVersion <*> pure True
1242 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1245 instance Arbitrary NgramsRepoElement where
1246 arbitrary = elements $ map ngramsElementToRepo ns
1248 NgramsTable ns = mockTable
1251 instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
1253 parseUrlPiece x = maybeToEither x (decode $ cs x)