1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE DataKinds #-}
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24 {-# LANGUAGE ScopedTypeVariables #-}
25 {-# LANGUAGE TemplateHaskell #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE FlexibleContexts #-}
28 {-# LANGUAGE FlexibleInstances #-}
29 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
30 {-# LANGUAGE MultiParamTypeClasses #-}
31 {-# LANGUAGE RankNTypes #-}
32 {-# LANGUAGE TypeFamilies #-}
33 {-# OPTIONS -fno-warn-orphans #-}
35 module Gargantext.API.Ngrams
47 , apiNgramsTableCorpus
69 , NgramsRepoElement(..)
78 , ngramsTypeFromTabType
95 , listNgramsChangedSince
99 import Control.Category ((>>>))
100 import Control.Concurrent
101 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
102 import Control.Monad.Base (MonadBase, liftBase)
103 import Control.Monad.Error.Class (MonadError)
104 import Control.Monad.Reader
105 import Control.Monad.State
106 import Control.Monad.Trans.Control (MonadBaseControl)
107 import Data.Aeson hiding ((.=))
108 import Data.Aeson.TH (deriveJSON)
109 import Data.Either(Either(Left))
110 import Data.Either.Extra (maybeToEither)
112 import Data.Map.Strict (Map)
113 import Data.Maybe (fromMaybe)
115 import Data.Ord (Down(..))
116 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours)
117 import Data.Set (Set)
118 import Data.Swagger hiding (version, patch)
119 import Data.Text (Text, isInfixOf, count)
121 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
122 import Formatting (hprint, int, (%))
123 import Formatting.Clock (timeSpecs)
124 import GHC.Generics (Generic)
125 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
126 import Gargantext.Core.Types (TODO)
127 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
128 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
129 import Gargantext.Database.Action.Query.Ngrams
130 import Gargantext.Database.Action.Query.Node.Select
131 import Gargantext.Database.Admin.Config (userMaster)
132 import Gargantext.Database.Admin.Types.Errors (HasNodeError)
133 import Gargantext.Database.Admin.Types.Node (NodeType(..))
134 import Gargantext.Database.Admin.Utils (fromField', HasConnectionPool)
135 import Gargantext.Database.Schema.Ngrams (NgramsType)
136 import Gargantext.Prelude
137 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
138 import Servant hiding (Patch)
139 import System.Clock (getTime, TimeSpec, Clock(..))
140 import System.FileLock (FileLock)
141 import System.IO (stderr)
142 import Test.QuickCheck (elements)
143 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
144 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
145 import qualified Data.List as List
146 import qualified Data.Map.Strict as Map
147 import qualified Data.Map.Strict.Patch as PM
148 import qualified Data.Set as S
149 import qualified Data.Set as Set
150 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
152 ------------------------------------------------------------------------
153 --data FacetFormat = Table | Chart
154 data TabType = Docs | Trash | MoreFav | MoreTrash
155 | Terms | Sources | Authors | Institutes
157 deriving (Generic, Enum, Bounded, Show)
159 instance FromHttpApiData TabType
161 parseUrlPiece "Docs" = pure Docs
162 parseUrlPiece "Trash" = pure Trash
163 parseUrlPiece "MoreFav" = pure MoreFav
164 parseUrlPiece "MoreTrash" = pure MoreTrash
166 parseUrlPiece "Terms" = pure Terms
167 parseUrlPiece "Sources" = pure Sources
168 parseUrlPiece "Institutes" = pure Institutes
169 parseUrlPiece "Authors" = pure Authors
171 parseUrlPiece "Contacts" = pure Contacts
173 parseUrlPiece _ = Left "Unexpected value of TabType"
175 instance ToParamSchema TabType
176 instance ToJSON TabType
177 instance FromJSON TabType
178 instance ToSchema TabType
179 instance Arbitrary TabType
181 arbitrary = elements [minBound .. maxBound]
183 newtype MSet a = MSet (Map a ())
184 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
186 instance ToJSON a => ToJSON (MSet a) where
187 toJSON (MSet m) = toJSON (Map.keys m)
188 toEncoding (MSet m) = toEncoding (Map.keys m)
190 mSetFromSet :: Set a -> MSet a
191 mSetFromSet = MSet . Map.fromSet (const ())
193 mSetFromList :: Ord a => [a] -> MSet a
194 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
196 -- mSetToSet :: Ord a => MSet a -> Set a
197 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
198 mSetToSet :: Ord a => MSet a -> Set a
199 mSetToSet = Set.fromList . mSetToList
201 mSetToList :: MSet a -> [a]
202 mSetToList (MSet a) = Map.keys a
204 instance Foldable MSet where
205 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
207 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
208 parseJSON = fmap mSetFromList . parseJSON
210 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
212 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
214 ------------------------------------------------------------------------
215 type NgramsTerm = Text
217 data RootParent = RootParent
218 { _rp_root :: NgramsTerm
219 , _rp_parent :: NgramsTerm
221 deriving (Ord, Eq, Show, Generic)
223 deriveJSON (unPrefix "_rp_") ''RootParent
224 makeLenses ''RootParent
226 data NgramsRepoElement = NgramsRepoElement
228 , _nre_list :: ListType
229 --, _nre_root_parent :: Maybe RootParent
230 , _nre_root :: Maybe NgramsTerm
231 , _nre_parent :: Maybe NgramsTerm
232 , _nre_children :: MSet NgramsTerm
234 deriving (Ord, Eq, Show, Generic)
236 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
237 makeLenses ''NgramsRepoElement
239 instance ToSchema NgramsRepoElement where
240 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
244 NgramsElement { _ne_ngrams :: NgramsTerm
246 , _ne_list :: ListType
247 , _ne_occurrences :: Int
248 , _ne_root :: Maybe NgramsTerm
249 , _ne_parent :: Maybe NgramsTerm
250 , _ne_children :: MSet NgramsTerm
252 deriving (Ord, Eq, Show, Generic)
254 deriveJSON (unPrefix "_ne_") ''NgramsElement
255 makeLenses ''NgramsElement
257 mkNgramsElement :: NgramsTerm
262 mkNgramsElement ngrams list rp children =
263 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
266 size = 1 + count " " ngrams
268 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
269 newNgramsElement mayList ngrams =
270 mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
272 instance ToSchema NgramsElement where
273 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
274 instance Arbitrary NgramsElement where
275 arbitrary = elements [newNgramsElement Nothing "sport"]
277 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
279 (NgramsElement { _ne_size = s
293 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
294 ngramsElementFromRepo
303 NgramsElement { _ne_size = s
308 , _ne_ngrams = ngrams
309 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
311 -- Here we could use 0 if we want to avoid any `panic`.
312 -- It will not happen using getTableNgrams if
313 -- getOccByNgramsOnly provides a count of occurrences for
314 -- all the ngrams given.
318 ------------------------------------------------------------------------
319 newtype NgramsTable = NgramsTable [NgramsElement]
320 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
322 type NgramsList = NgramsTable
324 makePrisms ''NgramsTable
326 -- | Question: why these repetition of Type in this instance
327 -- may you document it please ?
328 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
329 each = _NgramsTable . each
332 -- | TODO Check N and Weight
334 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
335 toNgramsElement ns = map toNgramsElement' ns
337 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
341 Just x -> lookup x mapParent
342 c' = maybe mempty identity $ lookup t mapChildren
343 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
345 mapParent :: Map Int Text
346 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
348 mapChildren :: Map Text (Set Text)
349 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
350 $ Map.fromListWith (<>)
351 $ map (first fromJust)
352 $ filter (isJust . fst)
353 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
356 mockTable :: NgramsTable
357 mockTable = NgramsTable
358 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
359 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
360 , mkNgramsElement "cats" StopTerm Nothing mempty
361 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
362 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
363 , mkNgramsElement "fox" GraphTerm Nothing mempty
364 , mkNgramsElement "object" CandidateTerm Nothing mempty
365 , mkNgramsElement "nothing" StopTerm Nothing mempty
366 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
367 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
368 , mkNgramsElement "moon" CandidateTerm Nothing mempty
369 , mkNgramsElement "sky" StopTerm Nothing mempty
372 rp n = Just $ RootParent n n
374 instance Arbitrary NgramsTable where
375 arbitrary = pure mockTable
377 instance ToSchema NgramsTable
379 ------------------------------------------------------------------------
380 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
381 ------------------------------------------------------------------------
382 -- On the Client side:
383 --data Action = InGroup NgramsId NgramsId
384 -- | OutGroup NgramsId NgramsId
385 -- | SetListType NgramsId ListType
387 data PatchSet a = PatchSet
391 deriving (Eq, Ord, Show, Generic)
393 makeLenses ''PatchSet
394 makePrisms ''PatchSet
396 instance ToJSON a => ToJSON (PatchSet a) where
397 toJSON = genericToJSON $ unPrefix "_"
398 toEncoding = genericToEncoding $ unPrefix "_"
400 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
401 parseJSON = genericParseJSON $ unPrefix "_"
404 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
405 arbitrary = PatchSet <$> arbitrary <*> arbitrary
407 type instance Patched (PatchSet a) = Set a
409 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
410 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
412 instance Ord a => Semigroup (PatchSet a) where
413 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
414 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
417 instance Ord a => Monoid (PatchSet a) where
418 mempty = PatchSet mempty mempty
420 instance Ord a => Group (PatchSet a) where
421 invert (PatchSet r a) = PatchSet a r
423 instance Ord a => Composable (PatchSet a) where
424 composable _ _ = undefined
426 instance Ord a => Action (PatchSet a) (Set a) where
427 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
429 instance Applicable (PatchSet a) (Set a) where
430 applicable _ _ = mempty
432 instance Ord a => Validity (PatchSet a) where
433 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
435 instance Ord a => Transformable (PatchSet a) where
436 transformable = undefined
438 conflicts _p _q = undefined
440 transformWith conflict p q = undefined conflict p q
442 instance ToSchema a => ToSchema (PatchSet a)
445 type AddRem = Replace (Maybe ())
447 remPatch, addPatch :: AddRem
448 remPatch = replace (Just ()) Nothing
449 addPatch = replace Nothing (Just ())
451 isRem :: Replace (Maybe ()) -> Bool
452 isRem = (== remPatch)
454 type PatchMap = PM.PatchMap
456 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
457 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
458 Transformable, Composable)
460 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
461 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
463 -- TODO this breaks module abstraction
464 makePrisms ''PM.PatchMap
466 makePrisms ''PatchMSet
468 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
469 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
471 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
472 f = Map.partition isRem >>> both %~ Map.keysSet
474 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
475 g (rems, adds) = Map.fromSet (const remPatch) rems
476 <> Map.fromSet (const addPatch) adds
478 instance Ord a => Action (PatchMSet a) (MSet a) where
479 act (PatchMSet p) (MSet m) = MSet $ act p m
481 instance Ord a => Applicable (PatchMSet a) (MSet a) where
482 applicable (PatchMSet p) (MSet m) = applicable p m
484 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
485 toJSON = toJSON . view _PatchMSetIso
486 toEncoding = toEncoding . view _PatchMSetIso
488 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
489 parseJSON = fmap (_PatchMSetIso #) . parseJSON
491 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
492 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
494 instance ToSchema a => ToSchema (PatchMSet a) where
496 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
498 type instance Patched (PatchMSet a) = MSet a
500 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
501 arbitrary = uncurry replace <$> arbitrary
502 -- If they happen to be equal then the patch is Keep.
504 instance ToSchema a => ToSchema (Replace a) where
505 declareNamedSchema (_ :: Proxy (Replace a)) = do
506 -- TODO Keep constructor is not supported here.
507 aSchema <- declareSchemaRef (Proxy :: Proxy a)
508 return $ NamedSchema (Just "Replace") $ mempty
509 & type_ ?~ SwaggerObject
511 InsOrdHashMap.fromList
515 & required .~ [ "old", "new" ]
518 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
519 , _patch_list :: Replace ListType -- TODO Map UserId ListType
521 deriving (Eq, Show, Generic)
523 deriveJSON (unPrefix "_") ''NgramsPatch
524 makeLenses ''NgramsPatch
526 instance ToSchema NgramsPatch where
527 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
529 instance Arbitrary NgramsPatch where
530 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
532 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
534 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
535 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
537 instance Semigroup NgramsPatch where
538 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
540 instance Monoid NgramsPatch where
541 mempty = _NgramsPatch # mempty
543 instance Validity NgramsPatch where
544 validate p = p ^. _NgramsPatch . to validate
546 instance Transformable NgramsPatch where
547 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
549 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
551 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
553 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
555 type ConflictResolutionNgramsPatch =
556 ( ConflictResolutionPatchMSet NgramsTerm
557 , ConflictResolutionReplace ListType
559 type instance ConflictResolution NgramsPatch =
560 ConflictResolutionNgramsPatch
562 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
563 -- ~ Patched NgramsPatchIso
564 type instance Patched NgramsPatch = PatchedNgramsPatch
566 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
567 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
568 applicable p (Just nre) =
569 applicable (p ^. patch_children) (nre ^. nre_children) <>
570 applicable (p ^. patch_list) (nre ^. nre_list)
572 instance Action NgramsPatch NgramsRepoElement where
573 act p = (nre_children %~ act (p ^. patch_children))
574 . (nre_list %~ act (p ^. patch_list))
576 instance Action NgramsPatch (Maybe NgramsRepoElement) where
579 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
580 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
582 instance FromField NgramsTablePatch
584 fromField = fromField'
586 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
588 fromField = fromField'
590 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
592 type instance ConflictResolution NgramsTablePatch =
593 NgramsTerm -> ConflictResolutionNgramsPatch
595 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
596 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
597 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
599 makePrisms ''NgramsTablePatch
600 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
601 instance ToSchema NgramsTablePatch
603 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
604 applicable p = applicable (p ^. _NgramsTablePatch)
606 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
608 fmap (execState (reParentNgramsTablePatch p)) .
609 act (p ^. _NgramsTablePatch)
611 instance Arbitrary NgramsTablePatch where
612 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
614 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
615 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
616 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
618 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
620 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
621 reRootChildren root ngram = do
622 nre <- use $ at ngram
623 forOf_ (_Just . nre_children . folded) nre $ \child -> do
624 at child . _Just . nre_root ?= root
625 reRootChildren root child
627 reParent :: Maybe RootParent -> ReParent NgramsTerm
628 reParent rp child = do
629 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
630 . (nre_root .~ (_rp_root <$> rp))
632 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
634 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
635 reParentAddRem rp child p =
636 reParent (if isRem p then Nothing else Just rp) child
638 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
639 reParentNgramsPatch parent ngramsPatch = do
640 root_of_parent <- use (at parent . _Just . nre_root)
642 root = fromMaybe parent root_of_parent
643 rp = RootParent { _rp_root = root, _rp_parent = parent }
644 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
645 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
647 reParentNgramsTablePatch :: ReParent NgramsTablePatch
648 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
649 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
651 ------------------------------------------------------------------------
652 ------------------------------------------------------------------------
655 data Versioned a = Versioned
656 { _v_version :: Version
659 deriving (Generic, Show, Eq)
660 deriveJSON (unPrefix "_v_") ''Versioned
661 makeLenses ''Versioned
662 instance ToSchema a => ToSchema (Versioned a) where
663 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
664 instance Arbitrary a => Arbitrary (Versioned a) where
665 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
669 -- TODO sequences of modifications (Patchs)
670 type NgramsIdPatch = Patch NgramsId NgramsPatch
672 ngramsPatch :: Int -> NgramsPatch
673 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
675 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
676 toEdit n p = Edit n p
677 ngramsIdPatch :: Patch NgramsId NgramsPatch
678 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
679 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
680 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
683 -- applyPatchBack :: Patch -> IO Patch
684 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
686 ------------------------------------------------------------------------
687 ------------------------------------------------------------------------
688 ------------------------------------------------------------------------
691 -- TODO: Replace.old is ignored which means that if the current list
692 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
693 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
694 -- However this should not happen in non conflicting situations.
695 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
696 mkListsUpdate nt patches =
697 [ (ngramsTypeId nt, ng, listTypeId lt)
698 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
699 , lt <- patch ^.. patch_list . new
702 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
705 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
706 mkChildrenGroups addOrRem nt patches =
707 [ (ngramsTypeId nt, parent, child)
708 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
709 , child <- patch ^.. patch_children . to addOrRem . folded
713 ngramsTypeFromTabType :: TabType -> NgramsType
714 ngramsTypeFromTabType tabType =
715 let lieu = "Garg.API.Ngrams: " :: Text in
717 Sources -> Ngrams.Sources
718 Authors -> Ngrams.Authors
719 Institutes -> Ngrams.Institutes
720 Terms -> Ngrams.NgramsTerms
721 _ -> panic $ lieu <> "No Ngrams for this tab"
722 -- TODO: This `panic` would disapear with custom NgramsType.
724 ------------------------------------------------------------------------
726 { _r_version :: Version
729 -- first patch in the list is the most recent
733 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
734 parseJSON = genericParseJSON $ unPrefix "_r_"
736 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
737 toJSON = genericToJSON $ unPrefix "_r_"
738 toEncoding = genericToEncoding $ unPrefix "_r_"
742 initRepo :: Monoid s => Repo s p
743 initRepo = Repo 1 mempty []
745 type NgramsRepo = Repo NgramsState NgramsStatePatch
746 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
747 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
749 initMockRepo :: NgramsRepo
750 initMockRepo = Repo 1 s []
752 s = Map.singleton Ngrams.NgramsTerms
753 $ Map.singleton 47254
755 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
757 data RepoEnv = RepoEnv
758 { _renv_var :: !(MVar NgramsRepo)
759 , _renv_saver :: !(IO ())
760 , _renv_lock :: !FileLock
766 class HasRepoVar env where
767 repoVar :: Getter env (MVar NgramsRepo)
769 instance HasRepoVar (MVar NgramsRepo) where
772 class HasRepoSaver env where
773 repoSaver :: Getter env (IO ())
775 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
776 repoEnv :: Getter env RepoEnv
778 instance HasRepo RepoEnv where
781 instance HasRepoVar RepoEnv where
784 instance HasRepoSaver RepoEnv where
785 repoSaver = renv_saver
787 type RepoCmdM env err m =
790 , MonadBaseControl IO m
793 ------------------------------------------------------------------------
795 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
797 saveRepo = liftBase =<< view repoSaver
799 listTypeConflictResolution :: ListType -> ListType -> ListType
800 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
802 ngramsStatePatchConflictResolution
803 :: NgramsType -> NodeId -> NgramsTerm
804 -> ConflictResolutionNgramsPatch
805 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
807 -- undefined {- TODO think this through -}, listTypeConflictResolution)
810 -- Insertions are not considered as patches,
811 -- they do not extend history,
812 -- they do not bump version.
813 insertNewOnly :: a -> Maybe b -> a
814 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
815 -- TODO error handling
817 something :: Monoid a => Maybe a -> a
818 something Nothing = mempty
819 something (Just a) = a
822 -- TODO refactor with putListNgrams
823 copyListNgrams :: RepoCmdM env err m
824 => NodeId -> NodeId -> NgramsType
826 copyListNgrams srcListId dstListId ngramsType = do
828 liftBase $ modifyMVar_ var $
829 pure . (r_state . at ngramsType %~ (Just . f . something))
832 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
833 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
835 -- TODO refactor with putListNgrams
836 -- The list must be non-empty!
837 -- The added ngrams must be non-existent!
838 addListNgrams :: RepoCmdM env err m
839 => NodeId -> NgramsType
840 -> [NgramsElement] -> m ()
841 addListNgrams listId ngramsType nes = do
843 liftBase $ modifyMVar_ var $
844 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
847 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
850 rmListNgrams :: RepoCmdM env err m
854 rmListNgrams l nt = setListNgrams l nt mempty
856 -- | TODO: incr the Version number
857 -- && should use patch
858 setListNgrams :: RepoCmdM env err m
861 -> Map NgramsTerm NgramsRepoElement
863 setListNgrams listId ngramsType ns = do
865 liftBase $ modifyMVar_ var $
869 (at listId .~ ( Just ns))
876 -- If the given list of ngrams elements contains ngrams already in
877 -- the repo, they will be ignored.
878 putListNgrams :: RepoCmdM env err m
879 => NodeId -> NgramsType
880 -> [NgramsElement] -> m ()
881 putListNgrams _ _ [] = pure ()
882 putListNgrams listId ngramsType nes = putListNgrams' listId ngramsType m
884 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
886 putListNgrams' :: RepoCmdM env err m
887 => ListId -> NgramsType
888 -> Map NgramsTerm NgramsRepoElement
890 putListNgrams' listId ngramsType ns = do
891 -- printDebug "putListNgrams" (length nes)
893 liftBase $ modifyMVar_ var $
910 tableNgramsPost :: RepoCmdM env err m
914 -> [NgramsTerm] -> m ()
915 tableNgramsPost tabType listId mayList =
916 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
918 currentVersion :: RepoCmdM env err m
922 r <- liftBase $ readMVar var
923 pure $ r ^. r_version
925 tableNgramsPull :: RepoCmdM env err m
926 => ListId -> NgramsType
928 -> m (Versioned NgramsTablePatch)
929 tableNgramsPull listId ngramsType p_version = do
931 r <- liftBase $ readMVar var
934 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
935 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
937 pure (Versioned (r ^. r_version) q_table)
939 -- Apply the given patch to the DB and returns the patch to be applied on the
942 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
944 -> Versioned NgramsTablePatch
945 -> m (Versioned NgramsTablePatch)
946 tableNgramsPut tabType listId (Versioned p_version p_table)
947 | p_table == mempty = do
948 let ngramsType = ngramsTypeFromTabType tabType
949 tableNgramsPull listId ngramsType p_version
952 let ngramsType = ngramsTypeFromTabType tabType
953 (p0, p0_validity) = PM.singleton listId p_table
954 (p, p_validity) = PM.singleton ngramsType p0
956 assertValid p0_validity
957 assertValid p_validity
960 vq' <- liftBase $ modifyMVar var $ \r -> do
962 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
963 (p', q') = transformWith ngramsStatePatchConflictResolution p q
964 r' = r & r_version +~ 1
966 & r_history %~ (p' :)
967 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
969 -- Ideally we would like to check these properties. However:
970 -- * They should be checked only to debug the code. The client data
971 -- should be able to trigger these.
972 -- * What kind of error should they throw (we are in IO here)?
973 -- * Should we keep modifyMVar?
974 -- * Should we throw the validation in an Exception, catch it around
975 -- modifyMVar and throw it back as an Error?
976 assertValid $ transformable p q
977 assertValid $ applicable p' (r ^. r_state)
979 pure (r', Versioned (r' ^. r_version) q'_table)
984 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
985 mergeNgramsElement _neOld neNew = neNew
987 { _ne_list :: ListType
988 If we merge the parents/children we can potentially create cycles!
989 , _ne_parent :: Maybe NgramsTerm
990 , _ne_children :: MSet NgramsTerm
994 getNgramsTableMap :: RepoCmdM env err m
997 -> m (Versioned NgramsTableMap)
998 getNgramsTableMap nodeId ngramsType = do
1000 repo <- liftBase $ readMVar v
1001 pure $ Versioned (repo ^. r_version)
1002 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1007 -- | TODO Errors management
1008 -- TODO: polymorphic for Annuaire or Corpus or ...
1009 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1010 -- TODO: should take only one ListId
1012 getTime' :: MonadBase IO m => m TimeSpec
1013 getTime' = liftBase $ getTime ProcessCPUTime
1016 getTableNgrams :: forall env err m.
1017 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1018 => NodeType -> NodeId -> TabType
1019 -> ListId -> Limit -> Maybe Offset
1021 -> Maybe MinSize -> Maybe MaxSize
1023 -> (NgramsTerm -> Bool)
1024 -> m (Versioned NgramsTable)
1025 getTableNgrams _nType nId tabType listId limit_ offset
1026 listType minSize maxSize orderBy searchQuery = do
1029 -- lIds <- selectNodesWithUsername NodeList userMaster
1031 ngramsType = ngramsTypeFromTabType tabType
1032 offset' = maybe 0 identity offset
1033 listType' = maybe (const True) (==) listType
1034 minSize' = maybe (const True) (<=) minSize
1035 maxSize' = maybe (const True) (>=) maxSize
1037 selected_node n = minSize' s
1039 && searchQuery (n ^. ne_ngrams)
1040 && listType' (n ^. ne_list)
1044 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1046 ---------------------------------------
1047 sortOnOrder Nothing = identity
1048 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1049 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1050 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1051 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1053 ---------------------------------------
1054 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1055 selectAndPaginate tableMap = roots <> inners
1057 list = tableMap ^.. each
1058 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1060 selected_nodes = list & take limit_
1062 . filter selected_node
1063 . sortOnOrder orderBy
1064 roots = rootOf <$> selected_nodes
1065 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1066 inners = list & filter (selected_inner rootsSet)
1068 ---------------------------------------
1069 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1070 setScores False table = pure table
1071 setScores True table = do
1072 let ngrams_terms = (table ^.. each . ne_ngrams)
1074 occurrences <- getOccByNgramsOnlyFast' nId
1079 liftBase $ hprint stderr
1080 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1081 (length ngrams_terms) t1 t2
1083 occurrences <- getOccByNgramsOnlySlow nType nId
1089 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1091 pure $ table & each %~ setOcc
1092 ---------------------------------------
1094 -- lists <- catMaybes <$> listsWith userMaster
1095 -- trace (show lists) $
1096 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1098 let scoresNeeded = needsScores orderBy
1099 tableMap1 <- getNgramsTableMap listId ngramsType
1101 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1102 . Map.mapWithKey ngramsElementFromRepo
1104 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1105 . setScores (not scoresNeeded)
1108 liftBase $ hprint stderr
1109 ("getTableNgrams total=" % timeSpecs
1110 % " map1=" % timeSpecs
1111 % " map2=" % timeSpecs
1112 % " map3=" % timeSpecs
1113 % " sql=" % (if scoresNeeded then "map2" else "map3")
1115 ) t0 t3 t0 t1 t1 t2 t2 t3
1121 -- TODO: find a better place for the code above, All APIs stay here
1122 type QueryParamR = QueryParam' '[Required, Strict]
1124 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1125 deriving (Generic, Enum, Bounded, Read, Show)
1127 instance FromHttpApiData OrderBy
1129 parseUrlPiece "TermAsc" = pure TermAsc
1130 parseUrlPiece "TermDesc" = pure TermDesc
1131 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1132 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1133 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1136 instance ToParamSchema OrderBy
1137 instance FromJSON OrderBy
1138 instance ToJSON OrderBy
1139 instance ToSchema OrderBy
1140 instance Arbitrary OrderBy
1142 arbitrary = elements [minBound..maxBound]
1144 needsScores :: Maybe OrderBy -> Bool
1145 needsScores (Just ScoreAsc) = True
1146 needsScores (Just ScoreDesc) = True
1147 needsScores _ = False
1149 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1150 :> QueryParamR "ngramsType" TabType
1151 :> QueryParamR "list" ListId
1152 :> QueryParamR "limit" Limit
1153 :> QueryParam "offset" Offset
1154 :> QueryParam "listType" ListType
1155 :> QueryParam "minTermSize" MinSize
1156 :> QueryParam "maxTermSize" MaxSize
1157 :> QueryParam "orderBy" OrderBy
1158 :> QueryParam "search" Text
1159 :> Get '[JSON] (Versioned NgramsTable)
1161 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1162 :> QueryParamR "ngramsType" TabType
1163 :> QueryParamR "list" ListId
1164 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1165 :> Put '[JSON] (Versioned NgramsTablePatch)
1167 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1168 :> QueryParamR "ngramsType" TabType
1169 :> QueryParamR "list" ListId
1170 :> QueryParam "listType" ListType
1171 :> ReqBody '[JSON] [NgramsTerm]
1174 type TableNgramsApi = TableNgramsApiGet
1175 :<|> TableNgramsApiPut
1176 :<|> TableNgramsApiPost
1178 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1179 => NodeId -> TabType
1180 -> ListId -> Limit -> Maybe Offset
1182 -> Maybe MinSize -> Maybe MaxSize
1184 -> Maybe Text -- full text search
1185 -> m (Versioned NgramsTable)
1186 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1187 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1189 searchQuery = maybe (const True) isInfixOf mt
1191 -- | Text search is deactivated for now for ngrams by doc only
1192 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1194 -> ListId -> Limit -> Maybe Offset
1196 -> Maybe MinSize -> Maybe MaxSize
1198 -> Maybe Text -- full text search
1199 -> m (Versioned NgramsTable)
1200 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1201 ns <- selectNodesWithUsername NodeList userMaster
1202 let ngramsType = ngramsTypeFromTabType tabType
1203 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1204 let searchQuery = flip S.member (S.fromList ngs)
1205 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1209 apiNgramsTableCorpus :: ( RepoCmdM env err m
1211 , HasInvalidError err
1212 , HasConnectionPool env
1214 => NodeId -> ServerT TableNgramsApi m
1215 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1217 :<|> tableNgramsPost
1220 apiNgramsTableDoc :: ( RepoCmdM env err m
1222 , HasInvalidError err
1223 , HasConnectionPool env
1225 => DocId -> ServerT TableNgramsApi m
1226 apiNgramsTableDoc dId = getTableNgramsDoc dId
1228 :<|> tableNgramsPost
1229 -- > add new ngrams in database (TODO AD)
1230 -- > index all the corpus accordingly (TODO AD)
1232 listNgramsChangedSince :: RepoCmdM env err m
1233 => ListId -> NgramsType -> Version -> m (Versioned Bool)
1234 listNgramsChangedSince listId ngramsType version
1236 Versioned <$> currentVersion <*> pure True
1238 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1241 instance Arbitrary NgramsRepoElement where
1242 arbitrary = elements $ map ngramsElementToRepo ns
1244 NgramsTable ns = mockTable
1247 instance FromHttpApiData (Map NgramsType (Versioned NgramsTableMap))
1249 parseUrlPiece x = maybeToEither x (decode $ cs x)