1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE DataKinds #-}
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24 {-# LANGUAGE ScopedTypeVariables #-}
25 {-# LANGUAGE TemplateHaskell #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE FlexibleContexts #-}
28 {-# LANGUAGE FlexibleInstances #-}
29 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
30 {-# LANGUAGE MultiParamTypeClasses #-}
31 {-# LANGUAGE RankNTypes #-}
32 {-# LANGUAGE TypeFamilies #-}
33 {-# OPTIONS -fno-warn-orphans #-}
35 module Gargantext.API.Ngrams
47 , apiNgramsTableCorpus
69 , NgramsRepoElement(..)
78 , ngramsTypeFromTabType
95 , listNgramsChangedSince
99 -- import Debug.Trace (trace)
100 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
101 -- import Gargantext.Database.Schema.User (UserId)
102 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
103 Composable(..), Transformable(..),
104 PairPatch(..), Patched, ConflictResolution,
105 ConflictResolutionReplace, ours)
106 import qualified Data.Map.Strict.Patch as PM
108 import Data.Ord (Down(..))
110 --import Data.Semigroup
111 import Data.Set (Set)
112 import qualified Data.Set as S
113 import qualified Data.List as List
114 import Data.Maybe (fromMaybe)
115 -- import Data.Tuple.Extra (first)
116 import qualified Data.Map.Strict as Map
117 import Data.Map.Strict (Map)
118 import qualified Data.Set as Set
119 import Control.Category ((>>>))
120 import Control.Concurrent
121 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
122 import Control.Monad.Error.Class (MonadError)
123 import Control.Monad.Reader
124 import Control.Monad.State
125 import Control.Monad.Trans.Control (MonadBaseControl)
126 import Data.Aeson hiding ((.=))
127 import Data.Aeson.TH (deriveJSON)
128 import Data.Either(Either(Left))
129 import Data.Either.Extra (maybeToEither)
130 -- import Data.Map (lookup)
131 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
132 import Data.Swagger hiding (version, patch)
133 import Data.Text (Text, isInfixOf, count)
135 import Formatting (hprint, int, (%))
136 import Formatting.Clock (timeSpecs)
137 import GHC.Generics (Generic)
138 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
139 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
140 import Gargantext.Database.Config (userMaster)
141 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
142 import Gargantext.Database.Schema.Ngrams (NgramsType)
143 import Gargantext.Database.Types.Node (NodeType(..))
144 import Gargantext.Database.Utils (fromField', HasConnectionPool)
145 import Gargantext.Database.Node.Select
146 import Gargantext.Database.Ngrams
147 --import Gargantext.Database.Lists (listsWith)
148 import Gargantext.Database.Schema.Node (HasNodeError)
149 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
150 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
151 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
152 import Gargantext.Prelude
153 import Gargantext.Core.Types (TODO)
154 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
155 import Servant hiding (Patch)
156 import System.Clock (getTime, TimeSpec, Clock(..))
157 import System.FileLock (FileLock)
158 import System.IO (stderr)
159 import Test.QuickCheck (elements)
160 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
162 ------------------------------------------------------------------------
163 --data FacetFormat = Table | Chart
164 data TabType = Docs | Trash | MoreFav | MoreTrash
165 | Terms | Sources | Authors | Institutes
167 deriving (Generic, Enum, Bounded, Show)
169 instance FromHttpApiData TabType
171 parseUrlPiece "Docs" = pure Docs
172 parseUrlPiece "Trash" = pure Trash
173 parseUrlPiece "MoreFav" = pure MoreFav
174 parseUrlPiece "MoreTrash" = pure MoreTrash
176 parseUrlPiece "Terms" = pure Terms
177 parseUrlPiece "Sources" = pure Sources
178 parseUrlPiece "Institutes" = pure Institutes
179 parseUrlPiece "Authors" = pure Authors
181 parseUrlPiece "Contacts" = pure Contacts
183 parseUrlPiece _ = Left "Unexpected value of TabType"
185 instance ToParamSchema TabType
186 instance ToJSON TabType
187 instance FromJSON TabType
188 instance ToSchema TabType
189 instance Arbitrary TabType
191 arbitrary = elements [minBound .. maxBound]
193 newtype MSet a = MSet (Map a ())
194 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
196 instance ToJSON a => ToJSON (MSet a) where
197 toJSON (MSet m) = toJSON (Map.keys m)
198 toEncoding (MSet m) = toEncoding (Map.keys m)
200 mSetFromSet :: Set a -> MSet a
201 mSetFromSet = MSet . Map.fromSet (const ())
203 mSetFromList :: Ord a => [a] -> MSet a
204 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
206 -- mSetToSet :: Ord a => MSet a -> Set a
207 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
208 mSetToSet :: Ord a => MSet a -> Set a
209 mSetToSet = Set.fromList . mSetToList
211 mSetToList :: MSet a -> [a]
212 mSetToList (MSet a) = Map.keys a
214 instance Foldable MSet where
215 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
217 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
218 parseJSON = fmap mSetFromList . parseJSON
220 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
222 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
224 ------------------------------------------------------------------------
225 type NgramsTerm = Text
227 data RootParent = RootParent
228 { _rp_root :: NgramsTerm
229 , _rp_parent :: NgramsTerm
231 deriving (Ord, Eq, Show, Generic)
233 deriveJSON (unPrefix "_rp_") ''RootParent
234 makeLenses ''RootParent
236 data NgramsRepoElement = NgramsRepoElement
238 , _nre_list :: ListType
239 --, _nre_root_parent :: Maybe RootParent
240 , _nre_root :: Maybe NgramsTerm
241 , _nre_parent :: Maybe NgramsTerm
242 , _nre_children :: MSet NgramsTerm
244 deriving (Ord, Eq, Show, Generic)
246 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
247 makeLenses ''NgramsRepoElement
249 instance ToSchema NgramsRepoElement where
250 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
254 NgramsElement { _ne_ngrams :: NgramsTerm
256 , _ne_list :: ListType
257 , _ne_occurrences :: Int
258 , _ne_root :: Maybe NgramsTerm
259 , _ne_parent :: Maybe NgramsTerm
260 , _ne_children :: MSet NgramsTerm
262 deriving (Ord, Eq, Show, Generic)
264 deriveJSON (unPrefix "_ne_") ''NgramsElement
265 makeLenses ''NgramsElement
267 mkNgramsElement :: NgramsTerm
272 mkNgramsElement ngrams list rp children =
273 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
276 size = 1 + count " " ngrams
278 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
279 newNgramsElement mayList ngrams =
280 mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
282 instance ToSchema NgramsElement where
283 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
284 instance Arbitrary NgramsElement where
285 arbitrary = elements [newNgramsElement Nothing "sport"]
287 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
289 (NgramsElement { _ne_size = s
303 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
304 ngramsElementFromRepo
313 NgramsElement { _ne_size = s
318 , _ne_ngrams = ngrams
319 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
321 -- Here we could use 0 if we want to avoid any `panic`.
322 -- It will not happen using getTableNgrams if
323 -- getOccByNgramsOnly provides a count of occurrences for
324 -- all the ngrams given.
328 ------------------------------------------------------------------------
329 newtype NgramsTable = NgramsTable [NgramsElement]
330 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
332 type NgramsList = NgramsTable
334 makePrisms ''NgramsTable
336 -- | Question: why these repetition of Type in this instance
337 -- may you document it please ?
338 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
339 each = _NgramsTable . each
342 -- | TODO Check N and Weight
344 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
345 toNgramsElement ns = map toNgramsElement' ns
347 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
351 Just x -> lookup x mapParent
352 c' = maybe mempty identity $ lookup t mapChildren
353 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
355 mapParent :: Map Int Text
356 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
358 mapChildren :: Map Text (Set Text)
359 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
360 $ Map.fromListWith (<>)
361 $ map (first fromJust)
362 $ filter (isJust . fst)
363 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
366 mockTable :: NgramsTable
367 mockTable = NgramsTable
368 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
369 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
370 , mkNgramsElement "cats" StopTerm Nothing mempty
371 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
372 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
373 , mkNgramsElement "fox" GraphTerm Nothing mempty
374 , mkNgramsElement "object" CandidateTerm Nothing mempty
375 , mkNgramsElement "nothing" StopTerm Nothing mempty
376 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
377 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
378 , mkNgramsElement "moon" CandidateTerm Nothing mempty
379 , mkNgramsElement "sky" StopTerm Nothing mempty
382 rp n = Just $ RootParent n n
384 instance Arbitrary NgramsTable where
385 arbitrary = pure mockTable
387 instance ToSchema NgramsTable
389 ------------------------------------------------------------------------
390 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
391 ------------------------------------------------------------------------
392 -- On the Client side:
393 --data Action = InGroup NgramsId NgramsId
394 -- | OutGroup NgramsId NgramsId
395 -- | SetListType NgramsId ListType
397 data PatchSet a = PatchSet
401 deriving (Eq, Ord, Show, Generic)
403 makeLenses ''PatchSet
404 makePrisms ''PatchSet
406 instance ToJSON a => ToJSON (PatchSet a) where
407 toJSON = genericToJSON $ unPrefix "_"
408 toEncoding = genericToEncoding $ unPrefix "_"
410 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
411 parseJSON = genericParseJSON $ unPrefix "_"
414 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
415 arbitrary = PatchSet <$> arbitrary <*> arbitrary
417 type instance Patched (PatchSet a) = Set a
419 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
420 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
422 instance Ord a => Semigroup (PatchSet a) where
423 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
424 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
427 instance Ord a => Monoid (PatchSet a) where
428 mempty = PatchSet mempty mempty
430 instance Ord a => Group (PatchSet a) where
431 invert (PatchSet r a) = PatchSet a r
433 instance Ord a => Composable (PatchSet a) where
434 composable _ _ = undefined
436 instance Ord a => Action (PatchSet a) (Set a) where
437 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
439 instance Applicable (PatchSet a) (Set a) where
440 applicable _ _ = mempty
442 instance Ord a => Validity (PatchSet a) where
443 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
445 instance Ord a => Transformable (PatchSet a) where
446 transformable = undefined
448 conflicts _p _q = undefined
450 transformWith conflict p q = undefined conflict p q
452 instance ToSchema a => ToSchema (PatchSet a)
455 type AddRem = Replace (Maybe ())
457 remPatch, addPatch :: AddRem
458 remPatch = replace (Just ()) Nothing
459 addPatch = replace Nothing (Just ())
461 isRem :: Replace (Maybe ()) -> Bool
462 isRem = (== remPatch)
464 type PatchMap = PM.PatchMap
466 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
467 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
468 Transformable, Composable)
470 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
471 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
473 -- TODO this breaks module abstraction
474 makePrisms ''PM.PatchMap
476 makePrisms ''PatchMSet
478 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
479 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
481 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
482 f = Map.partition isRem >>> both %~ Map.keysSet
484 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
485 g (rems, adds) = Map.fromSet (const remPatch) rems
486 <> Map.fromSet (const addPatch) adds
488 instance Ord a => Action (PatchMSet a) (MSet a) where
489 act (PatchMSet p) (MSet m) = MSet $ act p m
491 instance Ord a => Applicable (PatchMSet a) (MSet a) where
492 applicable (PatchMSet p) (MSet m) = applicable p m
494 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
495 toJSON = toJSON . view _PatchMSetIso
496 toEncoding = toEncoding . view _PatchMSetIso
498 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
499 parseJSON = fmap (_PatchMSetIso #) . parseJSON
501 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
502 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
504 instance ToSchema a => ToSchema (PatchMSet a) where
506 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
508 type instance Patched (PatchMSet a) = MSet a
510 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
511 arbitrary = uncurry replace <$> arbitrary
512 -- If they happen to be equal then the patch is Keep.
514 instance ToSchema a => ToSchema (Replace a) where
515 declareNamedSchema (_ :: Proxy (Replace a)) = do
516 -- TODO Keep constructor is not supported here.
517 aSchema <- declareSchemaRef (Proxy :: Proxy a)
518 return $ NamedSchema (Just "Replace") $ mempty
519 & type_ ?~ SwaggerObject
521 InsOrdHashMap.fromList
525 & required .~ [ "old", "new" ]
528 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
529 , _patch_list :: Replace ListType -- TODO Map UserId ListType
531 deriving (Eq, Show, Generic)
533 deriveJSON (unPrefix "_") ''NgramsPatch
534 makeLenses ''NgramsPatch
536 instance ToSchema NgramsPatch where
537 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
539 instance Arbitrary NgramsPatch where
540 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
542 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
544 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
545 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
547 instance Semigroup NgramsPatch where
548 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
550 instance Monoid NgramsPatch where
551 mempty = _NgramsPatch # mempty
553 instance Validity NgramsPatch where
554 validate p = p ^. _NgramsPatch . to validate
556 instance Transformable NgramsPatch where
557 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
559 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
561 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
563 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
565 type ConflictResolutionNgramsPatch =
566 ( ConflictResolutionPatchMSet NgramsTerm
567 , ConflictResolutionReplace ListType
569 type instance ConflictResolution NgramsPatch =
570 ConflictResolutionNgramsPatch
572 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
573 -- ~ Patched NgramsPatchIso
574 type instance Patched NgramsPatch = PatchedNgramsPatch
576 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
577 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
578 applicable p (Just nre) =
579 applicable (p ^. patch_children) (nre ^. nre_children) <>
580 applicable (p ^. patch_list) (nre ^. nre_list)
582 instance Action NgramsPatch NgramsRepoElement where
583 act p = (nre_children %~ act (p ^. patch_children))
584 . (nre_list %~ act (p ^. patch_list))
586 instance Action NgramsPatch (Maybe NgramsRepoElement) where
589 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
590 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
592 instance FromField NgramsTablePatch
594 fromField = fromField'
596 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
598 fromField = fromField'
600 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
602 type instance ConflictResolution NgramsTablePatch =
603 NgramsTerm -> ConflictResolutionNgramsPatch
605 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
606 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
607 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
609 makePrisms ''NgramsTablePatch
610 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
611 instance ToSchema NgramsTablePatch
613 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
614 applicable p = applicable (p ^. _NgramsTablePatch)
616 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
618 fmap (execState (reParentNgramsTablePatch p)) .
619 act (p ^. _NgramsTablePatch)
621 instance Arbitrary NgramsTablePatch where
622 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
624 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
625 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
626 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
628 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
630 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
631 reRootChildren root ngram = do
632 nre <- use $ at ngram
633 forOf_ (_Just . nre_children . folded) nre $ \child -> do
634 at child . _Just . nre_root ?= root
635 reRootChildren root child
637 reParent :: Maybe RootParent -> ReParent NgramsTerm
638 reParent rp child = do
639 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
640 . (nre_root .~ (_rp_root <$> rp))
642 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
644 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
645 reParentAddRem rp child p =
646 reParent (if isRem p then Nothing else Just rp) child
648 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
649 reParentNgramsPatch parent ngramsPatch = do
650 root_of_parent <- use (at parent . _Just . nre_root)
652 root = fromMaybe parent root_of_parent
653 rp = RootParent { _rp_root = root, _rp_parent = parent }
654 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
655 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
657 reParentNgramsTablePatch :: ReParent NgramsTablePatch
658 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
659 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
661 ------------------------------------------------------------------------
662 ------------------------------------------------------------------------
665 data Versioned a = Versioned
666 { _v_version :: Version
669 deriving (Generic, Show, Eq)
670 deriveJSON (unPrefix "_v_") ''Versioned
671 makeLenses ''Versioned
672 instance ToSchema a => ToSchema (Versioned a) where
673 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
674 instance Arbitrary a => Arbitrary (Versioned a) where
675 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
679 -- TODO sequences of modifications (Patchs)
680 type NgramsIdPatch = Patch NgramsId NgramsPatch
682 ngramsPatch :: Int -> NgramsPatch
683 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
685 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
686 toEdit n p = Edit n p
687 ngramsIdPatch :: Patch NgramsId NgramsPatch
688 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
689 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
690 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
693 -- applyPatchBack :: Patch -> IO Patch
694 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
696 ------------------------------------------------------------------------
697 ------------------------------------------------------------------------
698 ------------------------------------------------------------------------
701 -- TODO: Replace.old is ignored which means that if the current list
702 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
703 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
704 -- However this should not happen in non conflicting situations.
705 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
706 mkListsUpdate nt patches =
707 [ (ngramsTypeId nt, ng, listTypeId lt)
708 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
709 , lt <- patch ^.. patch_list . new
712 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
715 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
716 mkChildrenGroups addOrRem nt patches =
717 [ (ngramsTypeId nt, parent, child)
718 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
719 , child <- patch ^.. patch_children . to addOrRem . folded
723 ngramsTypeFromTabType :: TabType -> NgramsType
724 ngramsTypeFromTabType tabType =
725 let lieu = "Garg.API.Ngrams: " :: Text in
727 Sources -> Ngrams.Sources
728 Authors -> Ngrams.Authors
729 Institutes -> Ngrams.Institutes
730 Terms -> Ngrams.NgramsTerms
731 _ -> panic $ lieu <> "No Ngrams for this tab"
732 -- TODO: This `panic` would disapear with custom NgramsType.
734 ------------------------------------------------------------------------
736 { _r_version :: Version
739 -- first patch in the list is the most recent
743 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
744 parseJSON = genericParseJSON $ unPrefix "_r_"
746 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
747 toJSON = genericToJSON $ unPrefix "_r_"
748 toEncoding = genericToEncoding $ unPrefix "_r_"
752 initRepo :: Monoid s => Repo s p
753 initRepo = Repo 1 mempty []
755 type NgramsRepo = Repo NgramsState NgramsStatePatch
756 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
757 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
759 initMockRepo :: NgramsRepo
760 initMockRepo = Repo 1 s []
762 s = Map.singleton Ngrams.NgramsTerms
763 $ Map.singleton 47254
765 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
767 data RepoEnv = RepoEnv
768 { _renv_var :: !(MVar NgramsRepo)
769 , _renv_saver :: !(IO ())
770 , _renv_lock :: !FileLock
776 class HasRepoVar env where
777 repoVar :: Getter env (MVar NgramsRepo)
779 instance HasRepoVar (MVar NgramsRepo) where
782 class HasRepoSaver env where
783 repoSaver :: Getter env (IO ())
785 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
786 repoEnv :: Getter env RepoEnv
788 instance HasRepo RepoEnv where
791 instance HasRepoVar RepoEnv where
794 instance HasRepoSaver RepoEnv where
795 repoSaver = renv_saver
797 type RepoCmdM env err m =
800 , MonadIO m -- TODO liftIO -> liftBase
801 , MonadBaseControl IO m
804 ------------------------------------------------------------------------
806 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
808 saveRepo = liftIO =<< view repoSaver
810 listTypeConflictResolution :: ListType -> ListType -> ListType
811 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
813 ngramsStatePatchConflictResolution
814 :: NgramsType -> NodeId -> NgramsTerm
815 -> ConflictResolutionNgramsPatch
816 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
818 -- undefined {- TODO think this through -}, listTypeConflictResolution)
821 -- Insertions are not considered as patches,
822 -- they do not extend history,
823 -- they do not bump version.
824 insertNewOnly :: a -> Maybe b -> a
825 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
826 -- TODO error handling
828 something :: Monoid a => Maybe a -> a
829 something Nothing = mempty
830 something (Just a) = a
833 -- TODO refactor with putListNgrams
834 copyListNgrams :: RepoCmdM env err m
835 => NodeId -> NodeId -> NgramsType
837 copyListNgrams srcListId dstListId ngramsType = do
839 liftIO $ modifyMVar_ var $
840 pure . (r_state . at ngramsType %~ (Just . f . something))
843 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
844 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
846 -- TODO refactor with putListNgrams
847 -- The list must be non-empty!
848 -- The added ngrams must be non-existent!
849 addListNgrams :: RepoCmdM env err m
850 => NodeId -> NgramsType
851 -> [NgramsElement] -> m ()
852 addListNgrams listId ngramsType nes = do
854 liftIO $ modifyMVar_ var $
855 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
858 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
861 rmListNgrams :: RepoCmdM env err m
865 rmListNgrams l nt = setListNgrams l nt mempty
867 -- | TODO: incr the Version number
868 -- && should use patch
869 setListNgrams :: RepoCmdM env err m
872 -> Map NgramsTerm NgramsRepoElement
874 setListNgrams listId ngramsType ns = do
876 liftIO $ modifyMVar_ var $
880 (at listId .~ ( Just ns))
887 -- If the given list of ngrams elements contains ngrams already in
888 -- the repo, they will be ignored.
889 putListNgrams :: RepoCmdM env err m
890 => NodeId -> NgramsType
891 -> [NgramsElement] -> m ()
892 putListNgrams _ _ [] = pure ()
893 putListNgrams listId ngramsType nes = putListNgrams' listId ngramsType m
895 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
897 putListNgrams' :: RepoCmdM env err m
898 => ListId -> NgramsType
899 -> Map NgramsTerm NgramsRepoElement
901 putListNgrams' listId ngramsType ns = do
902 -- printDebug "putListNgrams" (length nes)
904 liftIO $ modifyMVar_ var $
921 tableNgramsPost :: RepoCmdM env err m
925 -> [NgramsTerm] -> m ()
926 tableNgramsPost tabType listId mayList =
927 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
929 currentVersion :: RepoCmdM env err m
933 r <- liftIO $ readMVar var
934 pure $ r ^. r_version
936 tableNgramsPull :: RepoCmdM env err m
937 => ListId -> NgramsType
939 -> m (Versioned NgramsTablePatch)
940 tableNgramsPull listId ngramsType p_version = do
942 r <- liftIO $ readMVar var
945 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
946 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
948 pure (Versioned (r ^. r_version) q_table)
950 -- Apply the given patch to the DB and returns the patch to be applied on the
953 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
955 -> Versioned NgramsTablePatch
956 -> m (Versioned NgramsTablePatch)
957 tableNgramsPut tabType listId (Versioned p_version p_table)
958 | p_table == mempty = do
959 let ngramsType = ngramsTypeFromTabType tabType
960 tableNgramsPull listId ngramsType p_version
963 let ngramsType = ngramsTypeFromTabType tabType
964 (p0, p0_validity) = PM.singleton listId p_table
965 (p, p_validity) = PM.singleton ngramsType p0
967 assertValid p0_validity
968 assertValid p_validity
971 vq' <- liftIO $ modifyMVar var $ \r -> do
973 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
974 (p', q') = transformWith ngramsStatePatchConflictResolution p q
975 r' = r & r_version +~ 1
977 & r_history %~ (p' :)
978 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
980 -- Ideally we would like to check these properties. However:
981 -- * They should be checked only to debug the code. The client data
982 -- should be able to trigger these.
983 -- * What kind of error should they throw (we are in IO here)?
984 -- * Should we keep modifyMVar?
985 -- * Should we throw the validation in an Exception, catch it around
986 -- modifyMVar and throw it back as an Error?
987 assertValid $ transformable p q
988 assertValid $ applicable p' (r ^. r_state)
990 pure (r', Versioned (r' ^. r_version) q'_table)
995 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
996 mergeNgramsElement _neOld neNew = neNew
998 { _ne_list :: ListType
999 If we merge the parents/children we can potentially create cycles!
1000 , _ne_parent :: Maybe NgramsTerm
1001 , _ne_children :: MSet NgramsTerm
1005 getNgramsTableMap :: RepoCmdM env err m
1008 -> m (Versioned NgramsTableMap)
1009 getNgramsTableMap nodeId ngramsType = do
1011 repo <- liftIO $ readMVar v
1012 pure $ Versioned (repo ^. r_version)
1013 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1018 -- | TODO Errors management
1019 -- TODO: polymorphic for Annuaire or Corpus or ...
1020 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1021 -- TODO: should take only one ListId
1023 getTime' :: MonadIO m => m TimeSpec
1024 getTime' = liftIO $ getTime ProcessCPUTime
1027 getTableNgrams :: forall env err m.
1028 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1029 => NodeType -> NodeId -> TabType
1030 -> ListId -> Limit -> Maybe Offset
1032 -> Maybe MinSize -> Maybe MaxSize
1034 -> (NgramsTerm -> Bool)
1035 -> m (Versioned NgramsTable)
1036 getTableNgrams _nType nId tabType listId limit_ offset
1037 listType minSize maxSize orderBy searchQuery = do
1040 -- lIds <- selectNodesWithUsername NodeList userMaster
1042 ngramsType = ngramsTypeFromTabType tabType
1043 offset' = maybe 0 identity offset
1044 listType' = maybe (const True) (==) listType
1045 minSize' = maybe (const True) (<=) minSize
1046 maxSize' = maybe (const True) (>=) maxSize
1048 selected_node n = minSize' s
1050 && searchQuery (n ^. ne_ngrams)
1051 && listType' (n ^. ne_list)
1055 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1057 ---------------------------------------
1058 sortOnOrder Nothing = identity
1059 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1060 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1061 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1062 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1064 ---------------------------------------
1065 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1066 selectAndPaginate tableMap = roots <> inners
1068 list = tableMap ^.. each
1069 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1071 selected_nodes = list & take limit_
1073 . filter selected_node
1074 . sortOnOrder orderBy
1075 roots = rootOf <$> selected_nodes
1076 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1077 inners = list & filter (selected_inner rootsSet)
1079 ---------------------------------------
1080 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1081 setScores False table = pure table
1082 setScores True table = do
1083 let ngrams_terms = (table ^.. each . ne_ngrams)
1085 occurrences <- getOccByNgramsOnlyFast' nId
1090 liftIO $ hprint stderr
1091 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1092 (length ngrams_terms) t1 t2
1094 occurrences <- getOccByNgramsOnlySlow nType nId
1100 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1102 pure $ table & each %~ setOcc
1103 ---------------------------------------
1105 -- lists <- catMaybes <$> listsWith userMaster
1106 -- trace (show lists) $
1107 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1109 let scoresNeeded = needsScores orderBy
1110 tableMap1 <- getNgramsTableMap listId ngramsType
1112 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1113 . Map.mapWithKey ngramsElementFromRepo
1115 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1116 . setScores (not scoresNeeded)
1119 liftIO $ hprint stderr
1120 ("getTableNgrams total=" % timeSpecs
1121 % " map1=" % timeSpecs
1122 % " map2=" % timeSpecs
1123 % " map3=" % timeSpecs
1124 % " sql=" % (if scoresNeeded then "map2" else "map3")
1126 ) t0 t3 t0 t1 t1 t2 t2 t3
1132 -- TODO: find a better place for the code above, All APIs stay here
1133 type QueryParamR = QueryParam' '[Required, Strict]
1135 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1136 deriving (Generic, Enum, Bounded, Read, Show)
1138 instance FromHttpApiData OrderBy
1140 parseUrlPiece "TermAsc" = pure TermAsc
1141 parseUrlPiece "TermDesc" = pure TermDesc
1142 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1143 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1144 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1147 instance ToParamSchema OrderBy
1148 instance FromJSON OrderBy
1149 instance ToJSON OrderBy
1150 instance ToSchema OrderBy
1151 instance Arbitrary OrderBy
1153 arbitrary = elements [minBound..maxBound]
1155 needsScores :: Maybe OrderBy -> Bool
1156 needsScores (Just ScoreAsc) = True
1157 needsScores (Just ScoreDesc) = True
1158 needsScores _ = False
1160 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1161 :> QueryParamR "ngramsType" TabType
1162 :> QueryParamR "list" ListId
1163 :> QueryParamR "limit" Limit
1164 :> QueryParam "offset" Offset
1165 :> QueryParam "listType" ListType
1166 :> QueryParam "minTermSize" MinSize
1167 :> QueryParam "maxTermSize" MaxSize
1168 :> QueryParam "orderBy" OrderBy
1169 :> QueryParam "search" Text
1170 :> Get '[JSON] (Versioned NgramsTable)
1172 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1173 :> QueryParamR "ngramsType" TabType
1174 :> QueryParamR "list" ListId
1175 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1176 :> Put '[JSON] (Versioned NgramsTablePatch)
1178 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1179 :> QueryParamR "ngramsType" TabType
1180 :> QueryParamR "list" ListId
1181 :> QueryParam "listType" ListType
1182 :> ReqBody '[JSON] [NgramsTerm]
1185 type TableNgramsApi = TableNgramsApiGet
1186 :<|> TableNgramsApiPut
1187 :<|> TableNgramsApiPost
1189 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1190 => NodeId -> TabType
1191 -> ListId -> Limit -> Maybe Offset
1193 -> Maybe MinSize -> Maybe MaxSize
1195 -> Maybe Text -- full text search
1196 -> m (Versioned NgramsTable)
1197 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1198 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1200 searchQuery = maybe (const True) isInfixOf mt
1202 -- | Text search is deactivated for now for ngrams by doc only
1203 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1205 -> ListId -> Limit -> Maybe Offset
1207 -> Maybe MinSize -> Maybe MaxSize
1209 -> Maybe Text -- full text search
1210 -> m (Versioned NgramsTable)
1211 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1212 ns <- selectNodesWithUsername NodeList userMaster
1213 let ngramsType = ngramsTypeFromTabType tabType
1214 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1215 let searchQuery = flip S.member (S.fromList ngs)
1216 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1220 apiNgramsTableCorpus :: ( RepoCmdM env err m
1222 , HasInvalidError err
1223 , HasConnectionPool env
1225 => NodeId -> ServerT TableNgramsApi m
1226 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1228 :<|> tableNgramsPost
1231 apiNgramsTableDoc :: ( RepoCmdM env err m
1233 , HasInvalidError err
1234 , HasConnectionPool env
1236 => DocId -> ServerT TableNgramsApi m
1237 apiNgramsTableDoc dId = getTableNgramsDoc dId
1239 :<|> tableNgramsPost
1240 -- > add new ngrams in database (TODO AD)
1241 -- > index all the corpus accordingly (TODO AD)
1243 listNgramsChangedSince :: RepoCmdM env err m
1244 => ListId -> NgramsType -> Version -> m (Versioned Bool)
1245 listNgramsChangedSince listId ngramsType version
1247 Versioned <$> currentVersion <*> pure True
1249 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1252 instance Arbitrary NgramsRepoElement where
1253 arbitrary = elements $ map ngramsElementToRepo ns
1255 NgramsTable ns = mockTable
1258 instance FromHttpApiData (Map NgramsType (Versioned NgramsTableMap))
1260 parseUrlPiece x = maybeToEither x (decode $ cs x)