1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE DataKinds #-}
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24 {-# LANGUAGE ScopedTypeVariables #-}
25 {-# LANGUAGE TemplateHaskell #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE FlexibleContexts #-}
28 {-# LANGUAGE FlexibleInstances #-}
29 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
30 {-# LANGUAGE MultiParamTypeClasses #-}
31 {-# LANGUAGE RankNTypes #-}
32 {-# LANGUAGE TypeFamilies #-}
33 {-# OPTIONS -fno-warn-orphans #-}
35 module Gargantext.API.Ngrams
47 , apiNgramsTableCorpus
69 , NgramsRepoElement(..)
78 , ngramsTypeFromTabType
95 , listNgramsChangedSince
99 import Codec.Serialise (Serialise())
100 import Control.Category ((>>>))
101 import Control.Concurrent
102 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
103 import Control.Monad.Base (MonadBase, liftBase)
104 import Control.Monad.Error.Class (MonadError)
105 import Control.Monad.Reader
106 import Control.Monad.State
107 import Control.Monad.Trans.Control (MonadBaseControl)
108 import Data.Aeson hiding ((.=))
109 import Data.Aeson.TH (deriveJSON)
110 import Data.Either(Either(Left))
111 import Data.Either.Extra (maybeToEither)
113 import Data.Map.Strict (Map)
114 import Data.Maybe (fromMaybe)
116 import Data.Ord (Down(..))
117 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, ours)
118 import Data.Set (Set)
119 import Data.Swagger hiding (version, patch)
120 import Data.Text (Text, isInfixOf, count)
122 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
123 import Formatting (hprint, int, (%))
124 import Formatting.Clock (timeSpecs)
125 import GHC.Generics (Generic)
126 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
127 import Gargantext.Core.Types (TODO)
128 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
129 import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
130 import Gargantext.Database.Query.Table.Node.Select
131 import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
132 import Gargantext.Database.Admin.Config (userMaster)
133 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
134 import Gargantext.Database.Admin.Types.Node (NodeType(..))
135 import Gargantext.Database.Prelude (fromField', HasConnectionPool)
136 import Gargantext.Prelude
137 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
138 import Servant hiding (Patch)
139 import System.Clock (getTime, TimeSpec, Clock(..))
140 import System.FileLock (FileLock)
141 import System.IO (stderr)
142 import Test.QuickCheck (elements)
143 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
144 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
145 import qualified Data.List as List
146 import qualified Data.Map.Strict as Map
147 import qualified Data.Map.Strict.Patch as PM
148 import qualified Data.Set as S
149 import qualified Data.Set as Set
150 import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
152 ------------------------------------------------------------------------
153 --data FacetFormat = Table | Chart
154 data TabType = Docs | Trash | MoreFav | MoreTrash
155 | Terms | Sources | Authors | Institutes
157 deriving (Generic, Enum, Bounded, Show)
159 instance FromHttpApiData TabType
161 parseUrlPiece "Docs" = pure Docs
162 parseUrlPiece "Trash" = pure Trash
163 parseUrlPiece "MoreFav" = pure MoreFav
164 parseUrlPiece "MoreTrash" = pure MoreTrash
166 parseUrlPiece "Terms" = pure Terms
167 parseUrlPiece "Sources" = pure Sources
168 parseUrlPiece "Institutes" = pure Institutes
169 parseUrlPiece "Authors" = pure Authors
171 parseUrlPiece "Contacts" = pure Contacts
173 parseUrlPiece _ = Left "Unexpected value of TabType"
175 instance ToParamSchema TabType
176 instance ToJSON TabType
177 instance FromJSON TabType
178 instance ToSchema TabType
179 instance Arbitrary TabType
181 arbitrary = elements [minBound .. maxBound]
183 newtype MSet a = MSet (Map a ())
184 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
186 instance ToJSON a => ToJSON (MSet a) where
187 toJSON (MSet m) = toJSON (Map.keys m)
188 toEncoding (MSet m) = toEncoding (Map.keys m)
190 mSetFromSet :: Set a -> MSet a
191 mSetFromSet = MSet . Map.fromSet (const ())
193 mSetFromList :: Ord a => [a] -> MSet a
194 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
196 -- mSetToSet :: Ord a => MSet a -> Set a
197 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
198 mSetToSet :: Ord a => MSet a -> Set a
199 mSetToSet = Set.fromList . mSetToList
201 mSetToList :: MSet a -> [a]
202 mSetToList (MSet a) = Map.keys a
204 instance Foldable MSet where
205 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
207 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
208 parseJSON = fmap mSetFromList . parseJSON
210 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
212 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
214 ------------------------------------------------------------------------
215 type NgramsTerm = Text
217 data RootParent = RootParent
218 { _rp_root :: NgramsTerm
219 , _rp_parent :: NgramsTerm
221 deriving (Ord, Eq, Show, Generic)
223 deriveJSON (unPrefix "_rp_") ''RootParent
224 makeLenses ''RootParent
226 data NgramsRepoElement = NgramsRepoElement
228 , _nre_list :: ListType
229 --, _nre_root_parent :: Maybe RootParent
230 , _nre_root :: Maybe NgramsTerm
231 , _nre_parent :: Maybe NgramsTerm
232 , _nre_children :: MSet NgramsTerm
234 deriving (Ord, Eq, Show, Generic)
236 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
237 makeLenses ''NgramsRepoElement
239 instance ToSchema NgramsRepoElement where
240 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
242 instance Serialise (MSet NgramsTerm)
243 instance Serialise NgramsRepoElement
246 NgramsElement { _ne_ngrams :: NgramsTerm
248 , _ne_list :: ListType
249 , _ne_occurrences :: Int
250 , _ne_root :: Maybe NgramsTerm
251 , _ne_parent :: Maybe NgramsTerm
252 , _ne_children :: MSet NgramsTerm
254 deriving (Ord, Eq, Show, Generic)
256 deriveJSON (unPrefix "_ne_") ''NgramsElement
257 makeLenses ''NgramsElement
259 mkNgramsElement :: NgramsTerm
264 mkNgramsElement ngrams list rp children =
265 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
268 size = 1 + count " " ngrams
270 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
271 newNgramsElement mayList ngrams =
272 mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
274 instance ToSchema NgramsElement where
275 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
276 instance Arbitrary NgramsElement where
277 arbitrary = elements [newNgramsElement Nothing "sport"]
279 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
281 (NgramsElement { _ne_size = s
295 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
296 ngramsElementFromRepo
305 NgramsElement { _ne_size = s
310 , _ne_ngrams = ngrams
311 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
313 -- Here we could use 0 if we want to avoid any `panic`.
314 -- It will not happen using getTableNgrams if
315 -- getOccByNgramsOnly provides a count of occurrences for
316 -- all the ngrams given.
320 ------------------------------------------------------------------------
321 newtype NgramsTable = NgramsTable [NgramsElement]
322 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
324 type NgramsList = NgramsTable
326 makePrisms ''NgramsTable
328 -- | Question: why these repetition of Type in this instance
329 -- may you document it please ?
330 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
331 each = _NgramsTable . each
334 -- | TODO Check N and Weight
336 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
337 toNgramsElement ns = map toNgramsElement' ns
339 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
343 Just x -> lookup x mapParent
344 c' = maybe mempty identity $ lookup t mapChildren
345 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
347 mapParent :: Map Int Text
348 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
350 mapChildren :: Map Text (Set Text)
351 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
352 $ Map.fromListWith (<>)
353 $ map (first fromJust)
354 $ filter (isJust . fst)
355 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
358 mockTable :: NgramsTable
359 mockTable = NgramsTable
360 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
361 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
362 , mkNgramsElement "cats" StopTerm Nothing mempty
363 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
364 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
365 , mkNgramsElement "fox" GraphTerm Nothing mempty
366 , mkNgramsElement "object" CandidateTerm Nothing mempty
367 , mkNgramsElement "nothing" StopTerm Nothing mempty
368 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
369 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
370 , mkNgramsElement "moon" CandidateTerm Nothing mempty
371 , mkNgramsElement "sky" StopTerm Nothing mempty
374 rp n = Just $ RootParent n n
376 instance Arbitrary NgramsTable where
377 arbitrary = pure mockTable
379 instance ToSchema NgramsTable
381 ------------------------------------------------------------------------
382 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
383 ------------------------------------------------------------------------
384 -- On the Client side:
385 --data Action = InGroup NgramsId NgramsId
386 -- | OutGroup NgramsId NgramsId
387 -- | SetListType NgramsId ListType
389 data PatchSet a = PatchSet
393 deriving (Eq, Ord, Show, Generic)
395 makeLenses ''PatchSet
396 makePrisms ''PatchSet
398 instance ToJSON a => ToJSON (PatchSet a) where
399 toJSON = genericToJSON $ unPrefix "_"
400 toEncoding = genericToEncoding $ unPrefix "_"
402 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
403 parseJSON = genericParseJSON $ unPrefix "_"
406 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
407 arbitrary = PatchSet <$> arbitrary <*> arbitrary
409 type instance Patched (PatchSet a) = Set a
411 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
412 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
414 instance Ord a => Semigroup (PatchSet a) where
415 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
416 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
419 instance Ord a => Monoid (PatchSet a) where
420 mempty = PatchSet mempty mempty
422 instance Ord a => Group (PatchSet a) where
423 invert (PatchSet r a) = PatchSet a r
425 instance Ord a => Composable (PatchSet a) where
426 composable _ _ = undefined
428 instance Ord a => Action (PatchSet a) (Set a) where
429 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
431 instance Applicable (PatchSet a) (Set a) where
432 applicable _ _ = mempty
434 instance Ord a => Validity (PatchSet a) where
435 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
437 instance Ord a => Transformable (PatchSet a) where
438 transformable = undefined
440 conflicts _p _q = undefined
442 transformWith conflict p q = undefined conflict p q
444 instance ToSchema a => ToSchema (PatchSet a)
447 type AddRem = Replace (Maybe ())
449 instance Serialise AddRem
451 remPatch, addPatch :: AddRem
452 remPatch = replace (Just ()) Nothing
453 addPatch = replace Nothing (Just ())
455 isRem :: Replace (Maybe ()) -> Bool
456 isRem = (== remPatch)
458 type PatchMap = PM.PatchMap
461 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
462 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
463 Transformable, Composable)
465 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
466 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
468 instance (Serialise a, Ord a) => Serialise (PatchMap a AddRem)
469 instance (Serialise a, Ord a) => Serialise (PatchMSet a)
471 -- TODO this breaks module abstraction
472 makePrisms ''PM.PatchMap
474 makePrisms ''PatchMSet
476 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
477 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
479 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
480 f = Map.partition isRem >>> both %~ Map.keysSet
482 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
483 g (rems, adds) = Map.fromSet (const remPatch) rems
484 <> Map.fromSet (const addPatch) adds
486 instance Ord a => Action (PatchMSet a) (MSet a) where
487 act (PatchMSet p) (MSet m) = MSet $ act p m
489 instance Ord a => Applicable (PatchMSet a) (MSet a) where
490 applicable (PatchMSet p) (MSet m) = applicable p m
492 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
493 toJSON = toJSON . view _PatchMSetIso
494 toEncoding = toEncoding . view _PatchMSetIso
496 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
497 parseJSON = fmap (_PatchMSetIso #) . parseJSON
499 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
500 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
502 instance ToSchema a => ToSchema (PatchMSet a) where
504 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
506 type instance Patched (PatchMSet a) = MSet a
508 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
509 arbitrary = uncurry replace <$> arbitrary
510 -- If they happen to be equal then the patch is Keep.
512 instance ToSchema a => ToSchema (Replace a) where
513 declareNamedSchema (_ :: Proxy (Replace a)) = do
514 -- TODO Keep constructor is not supported here.
515 aSchema <- declareSchemaRef (Proxy :: Proxy a)
516 return $ NamedSchema (Just "Replace") $ mempty
517 & type_ ?~ SwaggerObject
519 InsOrdHashMap.fromList
523 & required .~ [ "old", "new" ]
526 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
527 , _patch_list :: Replace ListType -- TODO Map UserId ListType
529 deriving (Eq, Show, Generic)
531 deriveJSON (unPrefix "_") ''NgramsPatch
532 makeLenses ''NgramsPatch
534 instance ToSchema NgramsPatch where
535 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
537 instance Arbitrary NgramsPatch where
538 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
540 instance Serialise NgramsPatch
541 instance Serialise (Replace ListType)
542 instance Serialise ListType
544 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
546 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
547 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
549 instance Semigroup NgramsPatch where
550 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
552 instance Monoid NgramsPatch where
553 mempty = _NgramsPatch # mempty
555 instance Validity NgramsPatch where
556 validate p = p ^. _NgramsPatch . to validate
558 instance Transformable NgramsPatch where
559 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
561 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
563 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
565 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
567 type ConflictResolutionNgramsPatch =
568 ( ConflictResolutionPatchMSet NgramsTerm
569 , ConflictResolutionReplace ListType
571 type instance ConflictResolution NgramsPatch =
572 ConflictResolutionNgramsPatch
574 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
575 -- ~ Patched NgramsPatchIso
576 type instance Patched NgramsPatch = PatchedNgramsPatch
578 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
579 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
580 applicable p (Just nre) =
581 applicable (p ^. patch_children) (nre ^. nre_children) <>
582 applicable (p ^. patch_list) (nre ^. nre_list)
584 instance Action NgramsPatch NgramsRepoElement where
585 act p = (nre_children %~ act (p ^. patch_children))
586 . (nre_list %~ act (p ^. patch_list))
588 instance Action NgramsPatch (Maybe NgramsRepoElement) where
591 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
592 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
594 instance Serialise NgramsTablePatch
595 instance Serialise (PatchMap NgramsTerm NgramsPatch)
597 instance FromField NgramsTablePatch
599 fromField = fromField'
601 instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
603 fromField = fromField'
605 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
607 type instance ConflictResolution NgramsTablePatch =
608 NgramsTerm -> ConflictResolutionNgramsPatch
610 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
611 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
612 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
614 makePrisms ''NgramsTablePatch
615 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
616 instance ToSchema NgramsTablePatch
618 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
619 applicable p = applicable (p ^. _NgramsTablePatch)
621 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
623 fmap (execState (reParentNgramsTablePatch p)) .
624 act (p ^. _NgramsTablePatch)
626 instance Arbitrary NgramsTablePatch where
627 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
629 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
630 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
631 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
633 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
635 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
636 reRootChildren root ngram = do
637 nre <- use $ at ngram
638 forOf_ (_Just . nre_children . folded) nre $ \child -> do
639 at child . _Just . nre_root ?= root
640 reRootChildren root child
642 reParent :: Maybe RootParent -> ReParent NgramsTerm
643 reParent rp child = do
644 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
645 . (nre_root .~ (_rp_root <$> rp))
647 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
649 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
650 reParentAddRem rp child p =
651 reParent (if isRem p then Nothing else Just rp) child
653 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
654 reParentNgramsPatch parent ngramsPatch = do
655 root_of_parent <- use (at parent . _Just . nre_root)
657 root = fromMaybe parent root_of_parent
658 rp = RootParent { _rp_root = root, _rp_parent = parent }
659 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
660 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
662 reParentNgramsTablePatch :: ReParent NgramsTablePatch
663 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
664 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
666 ------------------------------------------------------------------------
667 ------------------------------------------------------------------------
670 data Versioned a = Versioned
671 { _v_version :: Version
674 deriving (Generic, Show, Eq)
675 deriveJSON (unPrefix "_v_") ''Versioned
676 makeLenses ''Versioned
677 instance ToSchema a => ToSchema (Versioned a) where
678 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
679 instance Arbitrary a => Arbitrary (Versioned a) where
680 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
684 -- TODO sequences of modifications (Patchs)
685 type NgramsIdPatch = Patch NgramsId NgramsPatch
687 ngramsPatch :: Int -> NgramsPatch
688 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
690 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
691 toEdit n p = Edit n p
692 ngramsIdPatch :: Patch NgramsId NgramsPatch
693 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
694 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
695 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
698 -- applyPatchBack :: Patch -> IO Patch
699 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
701 ------------------------------------------------------------------------
702 ------------------------------------------------------------------------
703 ------------------------------------------------------------------------
706 -- TODO: Replace.old is ignored which means that if the current list
707 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
708 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
709 -- However this should not happen in non conflicting situations.
710 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
711 mkListsUpdate nt patches =
712 [ (ngramsTypeId nt, ng, listTypeId lt)
713 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
714 , lt <- patch ^.. patch_list . new
717 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
720 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
721 mkChildrenGroups addOrRem nt patches =
722 [ (ngramsTypeId nt, parent, child)
723 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
724 , child <- patch ^.. patch_children . to addOrRem . folded
728 ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
729 ngramsTypeFromTabType tabType =
730 let lieu = "Garg.API.Ngrams: " :: Text in
732 Sources -> TableNgrams.Sources
733 Authors -> TableNgrams.Authors
734 Institutes -> TableNgrams.Institutes
735 Terms -> TableNgrams.NgramsTerms
736 _ -> panic $ lieu <> "No Ngrams for this tab"
737 -- TODO: This `panic` would disapear with custom NgramsType.
739 ------------------------------------------------------------------------
741 { _r_version :: Version
744 -- first patch in the list is the most recent
748 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
749 parseJSON = genericParseJSON $ unPrefix "_r_"
751 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
752 toJSON = genericToJSON $ unPrefix "_r_"
753 toEncoding = genericToEncoding $ unPrefix "_r_"
755 instance (Serialise s, Serialise p) => Serialise (Repo s p)
759 initRepo :: Monoid s => Repo s p
760 initRepo = Repo 1 mempty []
762 type NgramsRepo = Repo NgramsState NgramsStatePatch
763 type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
764 type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
766 instance Serialise (PM.PatchMap NodeId NgramsTablePatch)
767 instance Serialise NgramsStatePatch
769 initMockRepo :: NgramsRepo
770 initMockRepo = Repo 1 s []
772 s = Map.singleton TableNgrams.NgramsTerms
773 $ Map.singleton 47254
775 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
777 data RepoEnv = RepoEnv
778 { _renv_var :: !(MVar NgramsRepo)
779 , _renv_saver :: !(IO ())
780 , _renv_lock :: !FileLock
786 class HasRepoVar env where
787 repoVar :: Getter env (MVar NgramsRepo)
789 instance HasRepoVar (MVar NgramsRepo) where
792 class HasRepoSaver env where
793 repoSaver :: Getter env (IO ())
795 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
796 repoEnv :: Getter env RepoEnv
798 instance HasRepo RepoEnv where
801 instance HasRepoVar RepoEnv where
804 instance HasRepoSaver RepoEnv where
805 repoSaver = renv_saver
807 type RepoCmdM env err m =
810 , MonadBaseControl IO m
813 ------------------------------------------------------------------------
815 saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
817 saveRepo = liftBase =<< view repoSaver
819 listTypeConflictResolution :: ListType -> ListType -> ListType
820 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
822 ngramsStatePatchConflictResolution
823 :: TableNgrams.NgramsType
826 -> ConflictResolutionNgramsPatch
827 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
829 -- undefined {- TODO think this through -}, listTypeConflictResolution)
832 -- Insertions are not considered as patches,
833 -- they do not extend history,
834 -- they do not bump version.
835 insertNewOnly :: a -> Maybe b -> a
836 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
837 -- TODO error handling
839 something :: Monoid a => Maybe a -> a
840 something Nothing = mempty
841 something (Just a) = a
844 -- TODO refactor with putListNgrams
845 copyListNgrams :: RepoCmdM env err m
846 => NodeId -> NodeId -> NgramsType
848 copyListNgrams srcListId dstListId ngramsType = do
850 liftBase $ modifyMVar_ var $
851 pure . (r_state . at ngramsType %~ (Just . f . something))
854 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
855 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
857 -- TODO refactor with putListNgrams
858 -- The list must be non-empty!
859 -- The added ngrams must be non-existent!
860 addListNgrams :: RepoCmdM env err m
861 => NodeId -> NgramsType
862 -> [NgramsElement] -> m ()
863 addListNgrams listId ngramsType nes = do
865 liftBase $ modifyMVar_ var $
866 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
869 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
872 rmListNgrams :: RepoCmdM env err m
874 -> TableNgrams.NgramsType
876 rmListNgrams l nt = setListNgrams l nt mempty
878 -- | TODO: incr the Version number
879 -- && should use patch
880 setListNgrams :: RepoCmdM env err m
882 -> TableNgrams.NgramsType
883 -> Map NgramsTerm NgramsRepoElement
885 setListNgrams listId ngramsType ns = do
887 liftBase $ modifyMVar_ var $
891 (at listId .~ ( Just ns))
898 -- If the given list of ngrams elements contains ngrams already in
899 -- the repo, they will be ignored.
900 putListNgrams :: RepoCmdM env err m
902 -> TableNgrams.NgramsType
903 -> [NgramsElement] -> m ()
904 putListNgrams _ _ [] = pure ()
905 putListNgrams listId ngramsType nes = putListNgrams' listId ngramsType m
907 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
909 putListNgrams' :: RepoCmdM env err m
911 -> TableNgrams.NgramsType
912 -> Map NgramsTerm NgramsRepoElement
914 putListNgrams' listId ngramsType ns = do
915 -- printDebug "putListNgrams" (length nes)
917 liftBase $ modifyMVar_ var $
934 tableNgramsPost :: RepoCmdM env err m
938 -> [NgramsTerm] -> m ()
939 tableNgramsPost tabType listId mayList =
940 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
942 currentVersion :: RepoCmdM env err m
946 r <- liftBase $ readMVar var
947 pure $ r ^. r_version
949 tableNgramsPull :: RepoCmdM env err m
951 -> TableNgrams.NgramsType
953 -> m (Versioned NgramsTablePatch)
954 tableNgramsPull listId ngramsType p_version = do
956 r <- liftBase $ readMVar var
959 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
960 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
962 pure (Versioned (r ^. r_version) q_table)
964 -- Apply the given patch to the DB and returns the patch to be applied on the
967 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
969 -> Versioned NgramsTablePatch
970 -> m (Versioned NgramsTablePatch)
971 tableNgramsPut tabType listId (Versioned p_version p_table)
972 | p_table == mempty = do
973 let ngramsType = ngramsTypeFromTabType tabType
974 tableNgramsPull listId ngramsType p_version
977 let ngramsType = ngramsTypeFromTabType tabType
978 (p0, p0_validity) = PM.singleton listId p_table
979 (p, p_validity) = PM.singleton ngramsType p0
981 assertValid p0_validity
982 assertValid p_validity
985 vq' <- liftBase $ modifyMVar var $ \r -> do
987 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
988 (p', q') = transformWith ngramsStatePatchConflictResolution p q
989 r' = r & r_version +~ 1
991 & r_history %~ (p' :)
992 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
994 -- Ideally we would like to check these properties. However:
995 -- * They should be checked only to debug the code. The client data
996 -- should be able to trigger these.
997 -- * What kind of error should they throw (we are in IO here)?
998 -- * Should we keep modifyMVar?
999 -- * Should we throw the validation in an Exception, catch it around
1000 -- modifyMVar and throw it back as an Error?
1001 assertValid $ transformable p q
1002 assertValid $ applicable p' (r ^. r_state)
1004 pure (r', Versioned (r' ^. r_version) q'_table)
1009 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
1010 mergeNgramsElement _neOld neNew = neNew
1012 { _ne_list :: ListType
1013 If we merge the parents/children we can potentially create cycles!
1014 , _ne_parent :: Maybe NgramsTerm
1015 , _ne_children :: MSet NgramsTerm
1019 getNgramsTableMap :: RepoCmdM env err m
1021 -> TableNgrams.NgramsType
1022 -> m (Versioned NgramsTableMap)
1023 getNgramsTableMap nodeId ngramsType = do
1025 repo <- liftBase $ readMVar v
1026 pure $ Versioned (repo ^. r_version)
1027 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1032 -- | TODO Errors management
1033 -- TODO: polymorphic for Annuaire or Corpus or ...
1034 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1035 -- TODO: should take only one ListId
1037 getTime' :: MonadBase IO m => m TimeSpec
1038 getTime' = liftBase $ getTime ProcessCPUTime
1041 getTableNgrams :: forall env err m.
1042 (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1043 => NodeType -> NodeId -> TabType
1044 -> ListId -> Limit -> Maybe Offset
1046 -> Maybe MinSize -> Maybe MaxSize
1048 -> (NgramsTerm -> Bool)
1049 -> m (Versioned NgramsTable)
1050 getTableNgrams _nType nId tabType listId limit_ offset
1051 listType minSize maxSize orderBy searchQuery = do
1054 -- lIds <- selectNodesWithUsername NodeList userMaster
1056 ngramsType = ngramsTypeFromTabType tabType
1057 offset' = maybe 0 identity offset
1058 listType' = maybe (const True) (==) listType
1059 minSize' = maybe (const True) (<=) minSize
1060 maxSize' = maybe (const True) (>=) maxSize
1062 selected_node n = minSize' s
1064 && searchQuery (n ^. ne_ngrams)
1065 && listType' (n ^. ne_list)
1069 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1071 ---------------------------------------
1072 sortOnOrder Nothing = identity
1073 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1074 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1075 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1076 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1078 ---------------------------------------
1079 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1080 selectAndPaginate tableMap = roots <> inners
1082 list = tableMap ^.. each
1083 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1085 selected_nodes = list & take limit_
1087 . filter selected_node
1088 . sortOnOrder orderBy
1089 roots = rootOf <$> selected_nodes
1090 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1091 inners = list & filter (selected_inner rootsSet)
1093 ---------------------------------------
1094 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1095 setScores False table = pure table
1096 setScores True table = do
1097 let ngrams_terms = (table ^.. each . ne_ngrams)
1099 occurrences <- getOccByNgramsOnlyFast' nId
1104 liftBase $ hprint stderr
1105 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1106 (length ngrams_terms) t1 t2
1108 occurrences <- getOccByNgramsOnlySlow nType nId
1114 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1116 pure $ table & each %~ setOcc
1117 ---------------------------------------
1119 -- lists <- catMaybes <$> listsWith userMaster
1120 -- trace (show lists) $
1121 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1123 let scoresNeeded = needsScores orderBy
1124 tableMap1 <- getNgramsTableMap listId ngramsType
1126 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1127 . Map.mapWithKey ngramsElementFromRepo
1129 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1130 . setScores (not scoresNeeded)
1133 liftBase $ hprint stderr
1134 ("getTableNgrams total=" % timeSpecs
1135 % " map1=" % timeSpecs
1136 % " map2=" % timeSpecs
1137 % " map3=" % timeSpecs
1138 % " sql=" % (if scoresNeeded then "map2" else "map3")
1140 ) t0 t3 t0 t1 t1 t2 t2 t3
1144 scoresRecomputeTableNgrams :: forall env err m. (RepoCmdM env err m, HasNodeError err, HasConnectionPool env) => NodeId -> TabType -> ListId -> m Int
1145 scoresRecomputeTableNgrams nId tabType listId = do
1146 tableMap <- getNgramsTableMap listId ngramsType
1147 _ <- tableMap & v_data %%~ setScores
1148 . Map.mapWithKey ngramsElementFromRepo
1152 ngramsType = ngramsTypeFromTabType tabType
1154 setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
1155 setScores table = do
1156 let ngrams_terms = (table ^.. each . ne_ngrams)
1157 occurrences <- getOccByNgramsOnlyFast' nId
1162 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1164 pure $ table & each %~ setOcc
1170 -- TODO: find a better place for the code above, All APIs stay here
1171 type QueryParamR = QueryParam' '[Required, Strict]
1173 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1174 deriving (Generic, Enum, Bounded, Read, Show)
1176 instance FromHttpApiData OrderBy
1178 parseUrlPiece "TermAsc" = pure TermAsc
1179 parseUrlPiece "TermDesc" = pure TermDesc
1180 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1181 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1182 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1185 instance ToParamSchema OrderBy
1186 instance FromJSON OrderBy
1187 instance ToJSON OrderBy
1188 instance ToSchema OrderBy
1189 instance Arbitrary OrderBy
1191 arbitrary = elements [minBound..maxBound]
1193 needsScores :: Maybe OrderBy -> Bool
1194 needsScores (Just ScoreAsc) = True
1195 needsScores (Just ScoreDesc) = True
1196 needsScores _ = False
1198 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1199 :> QueryParamR "ngramsType" TabType
1200 :> QueryParamR "list" ListId
1201 :> QueryParamR "limit" Limit
1202 :> QueryParam "offset" Offset
1203 :> QueryParam "listType" ListType
1204 :> QueryParam "minTermSize" MinSize
1205 :> QueryParam "maxTermSize" MaxSize
1206 :> QueryParam "orderBy" OrderBy
1207 :> QueryParam "search" Text
1208 :> Get '[JSON] (Versioned NgramsTable)
1210 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1211 :> QueryParamR "ngramsType" TabType
1212 :> QueryParamR "list" ListId
1213 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1214 :> Put '[JSON] (Versioned NgramsTablePatch)
1216 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1217 :> QueryParamR "ngramsType" TabType
1218 :> QueryParamR "list" ListId
1219 :> QueryParam "listType" ListType
1220 :> ReqBody '[JSON] [NgramsTerm]
1223 type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
1224 :> QueryParamR "ngramsType" TabType
1225 :> QueryParamR "list" ListId
1226 :> "recompute" :> Post '[JSON] Int
1228 type TableNgramsApi = TableNgramsApiGet
1229 :<|> TableNgramsApiPut
1230 :<|> TableNgramsApiPost
1231 :<|> RecomputeScoresNgramsApiGet
1233 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1234 => NodeId -> TabType
1235 -> ListId -> Limit -> Maybe Offset
1237 -> Maybe MinSize -> Maybe MaxSize
1239 -> Maybe Text -- full text search
1240 -> m (Versioned NgramsTable)
1241 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1242 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1244 searchQuery = maybe (const True) isInfixOf mt
1246 -- | Text search is deactivated for now for ngrams by doc only
1247 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env)
1249 -> ListId -> Limit -> Maybe Offset
1251 -> Maybe MinSize -> Maybe MaxSize
1253 -> Maybe Text -- full text search
1254 -> m (Versioned NgramsTable)
1255 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1256 ns <- selectNodesWithUsername NodeList userMaster
1257 let ngramsType = ngramsTypeFromTabType tabType
1258 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1259 let searchQuery = flip S.member (S.fromList ngs)
1260 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1264 apiNgramsTableCorpus :: ( RepoCmdM env err m
1266 , HasInvalidError err
1267 , HasConnectionPool env
1269 => NodeId -> ServerT TableNgramsApi m
1270 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1272 :<|> tableNgramsPost
1273 :<|> scoresRecomputeTableNgrams cId
1275 apiNgramsTableDoc :: ( RepoCmdM env err m
1277 , HasInvalidError err
1278 , HasConnectionPool env
1280 => DocId -> ServerT TableNgramsApi m
1281 apiNgramsTableDoc dId = getTableNgramsDoc dId
1283 :<|> tableNgramsPost
1284 :<|> scoresRecomputeTableNgrams dId
1285 -- > add new ngrams in database (TODO AD)
1286 -- > index all the corpus accordingly (TODO AD)
1288 listNgramsChangedSince :: RepoCmdM env err m
1289 => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
1290 listNgramsChangedSince listId ngramsType version
1292 Versioned <$> currentVersion <*> pure True
1294 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1297 instance Arbitrary NgramsRepoElement where
1298 arbitrary = elements $ map ngramsElementToRepo ns
1300 NgramsTable ns = mockTable
1303 instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
1305 parseUrlPiece x = maybeToEither x (decode $ cs x)