1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE DataKinds #-}
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24 {-# LANGUAGE ScopedTypeVariables #-}
25 {-# LANGUAGE TemplateHaskell #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE FlexibleContexts #-}
28 {-# LANGUAGE FlexibleInstances #-}
29 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
30 {-# LANGUAGE MultiParamTypeClasses #-}
31 {-# LANGUAGE RankNTypes #-}
32 {-# LANGUAGE TypeFamilies #-}
33 {-# OPTIONS -fno-warn-orphans #-}
35 module Gargantext.API.Ngrams
47 , apiNgramsTableCorpus
69 , NgramsRepoElement(..)
78 , ngramsTypeFromTabType
95 , listNgramsChangedSince
99 -- import Debug.Trace (trace)
100 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
101 -- import Gargantext.Database.Schema.User (UserId)
102 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
103 Composable(..), Transformable(..),
104 PairPatch(..), Patched, ConflictResolution,
105 ConflictResolutionReplace, ours)
106 import qualified Data.Map.Strict.Patch as PM
108 import Data.Ord (Down(..))
110 --import Data.Semigroup
111 import Data.Set (Set)
112 import qualified Data.Set as S
113 import qualified Data.List as List
114 import Data.Maybe (fromMaybe)
115 -- import Data.Tuple.Extra (first)
116 import qualified Data.Map.Strict as Map
117 import Data.Map.Strict (Map)
118 import qualified Data.Set as Set
119 import Control.Category ((>>>))
120 import Control.Concurrent
121 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (.~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
122 import Control.Monad.Error.Class (MonadError)
123 import Control.Monad.Reader
124 import Control.Monad.State
125 import Data.Aeson hiding ((.=))
126 import Data.Aeson.TH (deriveJSON)
127 import Data.Either(Either(Left))
128 import Data.Either.Extra (maybeToEither)
129 -- import Data.Map (lookup)
130 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
131 import Data.Swagger hiding (version, patch)
132 import Data.Text (Text, isInfixOf, count)
134 import Formatting (hprint, int, (%))
135 import Formatting.Clock (timeSpecs)
136 import GHC.Generics (Generic)
137 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
138 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
139 import Gargantext.Database.Config (userMaster)
140 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
141 import Gargantext.Database.Schema.Ngrams (NgramsType)
142 import Gargantext.Database.Types.Node (NodeType(..))
143 import Gargantext.Database.Utils (fromField', HasConnection)
144 import Gargantext.Database.Node.Select
145 import Gargantext.Database.Ngrams
146 --import Gargantext.Database.Lists (listsWith)
147 import Gargantext.Database.Schema.Node (HasNodeError)
148 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
149 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
150 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
151 import Gargantext.Prelude
152 import Gargantext.Core.Types (TODO)
153 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
154 import Servant hiding (Patch)
155 import System.Clock (getTime, TimeSpec, Clock(..))
156 import System.FileLock (FileLock)
157 import System.IO (stderr)
158 import Test.QuickCheck (elements)
159 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
161 ------------------------------------------------------------------------
162 --data FacetFormat = Table | Chart
163 data TabType = Docs | Trash | MoreFav | MoreTrash
164 | Terms | Sources | Authors | Institutes
166 deriving (Generic, Enum, Bounded, Show)
168 instance FromHttpApiData TabType
170 parseUrlPiece "Docs" = pure Docs
171 parseUrlPiece "Trash" = pure Trash
172 parseUrlPiece "MoreFav" = pure MoreFav
173 parseUrlPiece "MoreTrash" = pure MoreTrash
175 parseUrlPiece "Terms" = pure Terms
176 parseUrlPiece "Sources" = pure Sources
177 parseUrlPiece "Institutes" = pure Institutes
178 parseUrlPiece "Authors" = pure Authors
180 parseUrlPiece "Contacts" = pure Contacts
182 parseUrlPiece _ = Left "Unexpected value of TabType"
184 instance ToParamSchema TabType
185 instance ToJSON TabType
186 instance FromJSON TabType
187 instance ToSchema TabType
188 instance Arbitrary TabType
190 arbitrary = elements [minBound .. maxBound]
192 newtype MSet a = MSet (Map a ())
193 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
195 instance ToJSON a => ToJSON (MSet a) where
196 toJSON (MSet m) = toJSON (Map.keys m)
197 toEncoding (MSet m) = toEncoding (Map.keys m)
199 mSetFromSet :: Set a -> MSet a
200 mSetFromSet = MSet . Map.fromSet (const ())
202 mSetFromList :: Ord a => [a] -> MSet a
203 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
205 -- mSetToSet :: Ord a => MSet a -> Set a
206 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
207 mSetToSet :: Ord a => MSet a -> Set a
208 mSetToSet = Set.fromList . mSetToList
210 mSetToList :: MSet a -> [a]
211 mSetToList (MSet a) = Map.keys a
213 instance Foldable MSet where
214 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
216 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
217 parseJSON = fmap mSetFromList . parseJSON
219 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
221 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
223 ------------------------------------------------------------------------
224 type NgramsTerm = Text
226 data RootParent = RootParent
227 { _rp_root :: NgramsTerm
228 , _rp_parent :: NgramsTerm
230 deriving (Ord, Eq, Show, Generic)
232 deriveJSON (unPrefix "_rp_") ''RootParent
233 makeLenses ''RootParent
235 data NgramsRepoElement = NgramsRepoElement
237 , _nre_list :: ListType
238 --, _nre_root_parent :: Maybe RootParent
239 , _nre_root :: Maybe NgramsTerm
240 , _nre_parent :: Maybe NgramsTerm
241 , _nre_children :: MSet NgramsTerm
243 deriving (Ord, Eq, Show, Generic)
245 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
246 makeLenses ''NgramsRepoElement
248 instance ToSchema NgramsRepoElement where
249 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
253 NgramsElement { _ne_ngrams :: NgramsTerm
255 , _ne_list :: ListType
256 , _ne_occurrences :: Int
257 , _ne_root :: Maybe NgramsTerm
258 , _ne_parent :: Maybe NgramsTerm
259 , _ne_children :: MSet NgramsTerm
261 deriving (Ord, Eq, Show, Generic)
263 deriveJSON (unPrefix "_ne_") ''NgramsElement
264 makeLenses ''NgramsElement
266 mkNgramsElement :: NgramsTerm
271 mkNgramsElement ngrams list rp children =
272 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
275 size = 1 + count " " ngrams
277 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
278 newNgramsElement mayList ngrams =
279 mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
281 instance ToSchema NgramsElement where
282 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
283 instance Arbitrary NgramsElement where
284 arbitrary = elements [newNgramsElement Nothing "sport"]
286 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
288 (NgramsElement { _ne_size = s
302 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
303 ngramsElementFromRepo
312 NgramsElement { _ne_size = s
317 , _ne_ngrams = ngrams
318 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
320 -- Here we could use 0 if we want to avoid any `panic`.
321 -- It will not happen using getTableNgrams if
322 -- getOccByNgramsOnly provides a count of occurrences for
323 -- all the ngrams given.
327 ------------------------------------------------------------------------
328 newtype NgramsTable = NgramsTable [NgramsElement]
329 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
331 type NgramsList = NgramsTable
333 makePrisms ''NgramsTable
335 -- | Question: why these repetition of Type in this instance
336 -- may you document it please ?
337 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
338 each = _NgramsTable . each
341 -- | TODO Check N and Weight
343 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
344 toNgramsElement ns = map toNgramsElement' ns
346 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
350 Just x -> lookup x mapParent
351 c' = maybe mempty identity $ lookup t mapChildren
352 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
354 mapParent :: Map Int Text
355 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
357 mapChildren :: Map Text (Set Text)
358 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
359 $ Map.fromListWith (<>)
360 $ map (first fromJust)
361 $ filter (isJust . fst)
362 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
365 mockTable :: NgramsTable
366 mockTable = NgramsTable
367 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
368 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
369 , mkNgramsElement "cats" StopTerm Nothing mempty
370 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
371 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
372 , mkNgramsElement "fox" GraphTerm Nothing mempty
373 , mkNgramsElement "object" CandidateTerm Nothing mempty
374 , mkNgramsElement "nothing" StopTerm Nothing mempty
375 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
376 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
377 , mkNgramsElement "moon" CandidateTerm Nothing mempty
378 , mkNgramsElement "sky" StopTerm Nothing mempty
381 rp n = Just $ RootParent n n
383 instance Arbitrary NgramsTable where
384 arbitrary = pure mockTable
386 instance ToSchema NgramsTable
388 ------------------------------------------------------------------------
389 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
390 ------------------------------------------------------------------------
391 -- On the Client side:
392 --data Action = InGroup NgramsId NgramsId
393 -- | OutGroup NgramsId NgramsId
394 -- | SetListType NgramsId ListType
396 data PatchSet a = PatchSet
400 deriving (Eq, Ord, Show, Generic)
402 makeLenses ''PatchSet
403 makePrisms ''PatchSet
405 instance ToJSON a => ToJSON (PatchSet a) where
406 toJSON = genericToJSON $ unPrefix "_"
407 toEncoding = genericToEncoding $ unPrefix "_"
409 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
410 parseJSON = genericParseJSON $ unPrefix "_"
413 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
414 arbitrary = PatchSet <$> arbitrary <*> arbitrary
416 type instance Patched (PatchSet a) = Set a
418 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
419 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
421 instance Ord a => Semigroup (PatchSet a) where
422 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
423 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
426 instance Ord a => Monoid (PatchSet a) where
427 mempty = PatchSet mempty mempty
429 instance Ord a => Group (PatchSet a) where
430 invert (PatchSet r a) = PatchSet a r
432 instance Ord a => Composable (PatchSet a) where
433 composable _ _ = undefined
435 instance Ord a => Action (PatchSet a) (Set a) where
436 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
438 instance Applicable (PatchSet a) (Set a) where
439 applicable _ _ = mempty
441 instance Ord a => Validity (PatchSet a) where
442 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
444 instance Ord a => Transformable (PatchSet a) where
445 transformable = undefined
447 conflicts _p _q = undefined
449 transformWith conflict p q = undefined conflict p q
451 instance ToSchema a => ToSchema (PatchSet a)
454 type AddRem = Replace (Maybe ())
456 remPatch, addPatch :: AddRem
457 remPatch = replace (Just ()) Nothing
458 addPatch = replace Nothing (Just ())
460 isRem :: Replace (Maybe ()) -> Bool
461 isRem = (== remPatch)
463 type PatchMap = PM.PatchMap
465 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
466 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
467 Transformable, Composable)
469 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
470 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
472 -- TODO this breaks module abstraction
473 makePrisms ''PM.PatchMap
475 makePrisms ''PatchMSet
477 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
478 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
480 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
481 f = Map.partition isRem >>> both %~ Map.keysSet
483 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
484 g (rems, adds) = Map.fromSet (const remPatch) rems
485 <> Map.fromSet (const addPatch) adds
487 instance Ord a => Action (PatchMSet a) (MSet a) where
488 act (PatchMSet p) (MSet m) = MSet $ act p m
490 instance Ord a => Applicable (PatchMSet a) (MSet a) where
491 applicable (PatchMSet p) (MSet m) = applicable p m
493 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
494 toJSON = toJSON . view _PatchMSetIso
495 toEncoding = toEncoding . view _PatchMSetIso
497 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
498 parseJSON = fmap (_PatchMSetIso #) . parseJSON
500 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
501 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
503 instance ToSchema a => ToSchema (PatchMSet a) where
505 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
507 type instance Patched (PatchMSet a) = MSet a
509 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
510 arbitrary = uncurry replace <$> arbitrary
511 -- If they happen to be equal then the patch is Keep.
513 instance ToSchema a => ToSchema (Replace a) where
514 declareNamedSchema (_ :: Proxy (Replace a)) = do
515 -- TODO Keep constructor is not supported here.
516 aSchema <- declareSchemaRef (Proxy :: Proxy a)
517 return $ NamedSchema (Just "Replace") $ mempty
518 & type_ ?~ SwaggerObject
520 InsOrdHashMap.fromList
524 & required .~ [ "old", "new" ]
527 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
528 , _patch_list :: Replace ListType -- TODO Map UserId ListType
530 deriving (Eq, Show, Generic)
532 deriveJSON (unPrefix "_") ''NgramsPatch
533 makeLenses ''NgramsPatch
535 instance ToSchema NgramsPatch where
536 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
538 instance Arbitrary NgramsPatch where
539 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
541 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
543 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
544 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
546 instance Semigroup NgramsPatch where
547 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
549 instance Monoid NgramsPatch where
550 mempty = _NgramsPatch # mempty
552 instance Validity NgramsPatch where
553 validate p = p ^. _NgramsPatch . to validate
555 instance Transformable NgramsPatch where
556 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
558 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
560 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
562 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
564 type ConflictResolutionNgramsPatch =
565 ( ConflictResolutionPatchMSet NgramsTerm
566 , ConflictResolutionReplace ListType
568 type instance ConflictResolution NgramsPatch =
569 ConflictResolutionNgramsPatch
571 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
572 -- ~ Patched NgramsPatchIso
573 type instance Patched NgramsPatch = PatchedNgramsPatch
575 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
576 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
577 applicable p (Just nre) =
578 applicable (p ^. patch_children) (nre ^. nre_children) <>
579 applicable (p ^. patch_list) (nre ^. nre_list)
581 instance Action NgramsPatch NgramsRepoElement where
582 act p = (nre_children %~ act (p ^. patch_children))
583 . (nre_list %~ act (p ^. patch_list))
585 instance Action NgramsPatch (Maybe NgramsRepoElement) where
588 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
589 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
591 instance FromField NgramsTablePatch
593 fromField = fromField'
595 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
597 fromField = fromField'
599 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
601 type instance ConflictResolution NgramsTablePatch =
602 NgramsTerm -> ConflictResolutionNgramsPatch
604 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
605 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
606 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
608 makePrisms ''NgramsTablePatch
609 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
610 instance ToSchema NgramsTablePatch
612 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
613 applicable p = applicable (p ^. _NgramsTablePatch)
615 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
617 fmap (execState (reParentNgramsTablePatch p)) .
618 act (p ^. _NgramsTablePatch)
620 instance Arbitrary NgramsTablePatch where
621 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
623 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
624 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
625 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
627 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
629 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
630 reRootChildren root ngram = do
631 nre <- use $ at ngram
632 forOf_ (_Just . nre_children . folded) nre $ \child -> do
633 at child . _Just . nre_root ?= root
634 reRootChildren root child
636 reParent :: Maybe RootParent -> ReParent NgramsTerm
637 reParent rp child = do
638 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
639 . (nre_root .~ (_rp_root <$> rp))
641 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
643 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
644 reParentAddRem rp child p =
645 reParent (if isRem p then Nothing else Just rp) child
647 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
648 reParentNgramsPatch parent ngramsPatch = do
649 root_of_parent <- use (at parent . _Just . nre_root)
651 root = fromMaybe parent root_of_parent
652 rp = RootParent { _rp_root = root, _rp_parent = parent }
653 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
654 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
656 reParentNgramsTablePatch :: ReParent NgramsTablePatch
657 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
658 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
660 ------------------------------------------------------------------------
661 ------------------------------------------------------------------------
664 data Versioned a = Versioned
665 { _v_version :: Version
668 deriving (Generic, Show, Eq)
669 deriveJSON (unPrefix "_v_") ''Versioned
670 makeLenses ''Versioned
671 instance ToSchema a => ToSchema (Versioned a) where
672 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
673 instance Arbitrary a => Arbitrary (Versioned a) where
674 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
678 -- TODO sequences of modifications (Patchs)
679 type NgramsIdPatch = Patch NgramsId NgramsPatch
681 ngramsPatch :: Int -> NgramsPatch
682 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
684 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
685 toEdit n p = Edit n p
686 ngramsIdPatch :: Patch NgramsId NgramsPatch
687 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
688 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
689 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
692 -- applyPatchBack :: Patch -> IO Patch
693 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
695 ------------------------------------------------------------------------
696 ------------------------------------------------------------------------
697 ------------------------------------------------------------------------
700 -- TODO: Replace.old is ignored which means that if the current list
701 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
702 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
703 -- However this should not happen in non conflicting situations.
704 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
705 mkListsUpdate nt patches =
706 [ (ngramsTypeId nt, ng, listTypeId lt)
707 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
708 , lt <- patch ^.. patch_list . new
711 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
714 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
715 mkChildrenGroups addOrRem nt patches =
716 [ (ngramsTypeId nt, parent, child)
717 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
718 , child <- patch ^.. patch_children . to addOrRem . folded
722 ngramsTypeFromTabType :: TabType -> NgramsType
723 ngramsTypeFromTabType tabType =
724 let lieu = "Garg.API.Ngrams: " :: Text in
726 Sources -> Ngrams.Sources
727 Authors -> Ngrams.Authors
728 Institutes -> Ngrams.Institutes
729 Terms -> Ngrams.NgramsTerms
730 _ -> panic $ lieu <> "No Ngrams for this tab"
731 -- TODO: This `panic` would disapear with custom NgramsType.
733 ------------------------------------------------------------------------
735 { _r_version :: Version
738 -- first patch in the list is the most recent
742 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
743 parseJSON = genericParseJSON $ unPrefix "_r_"
745 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
746 toJSON = genericToJSON $ unPrefix "_r_"
747 toEncoding = genericToEncoding $ unPrefix "_r_"
751 initRepo :: Monoid s => Repo s p
752 initRepo = Repo 1 mempty []
754 type NgramsRepo = Repo NgramsState NgramsStatePatch
755 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
756 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
758 initMockRepo :: NgramsRepo
759 initMockRepo = Repo 1 s []
761 s = Map.singleton Ngrams.NgramsTerms
762 $ Map.singleton 47254
764 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
766 data RepoEnv = RepoEnv
767 { _renv_var :: !(MVar NgramsRepo)
768 , _renv_saver :: !(IO ())
769 , _renv_lock :: !FileLock
775 class HasRepoVar env where
776 repoVar :: Getter env (MVar NgramsRepo)
778 instance HasRepoVar (MVar NgramsRepo) where
781 class HasRepoSaver env where
782 repoSaver :: Getter env (IO ())
784 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
785 repoEnv :: Getter env RepoEnv
787 instance HasRepo RepoEnv where
790 instance HasRepoVar RepoEnv where
793 instance HasRepoSaver RepoEnv where
794 repoSaver = renv_saver
796 type RepoCmdM env err m =
802 ------------------------------------------------------------------------
804 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
806 saveRepo = liftIO =<< view repoSaver
808 listTypeConflictResolution :: ListType -> ListType -> ListType
809 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
811 ngramsStatePatchConflictResolution
812 :: NgramsType -> NodeId -> NgramsTerm
813 -> ConflictResolutionNgramsPatch
814 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
816 -- undefined {- TODO think this through -}, listTypeConflictResolution)
819 -- Insertions are not considered as patches,
820 -- they do not extend history,
821 -- they do not bump version.
822 insertNewOnly :: a -> Maybe b -> a
823 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
824 -- TODO error handling
826 something :: Monoid a => Maybe a -> a
827 something Nothing = mempty
828 something (Just a) = a
831 -- TODO refactor with putListNgrams
832 copyListNgrams :: RepoCmdM env err m
833 => NodeId -> NodeId -> NgramsType
835 copyListNgrams srcListId dstListId ngramsType = do
837 liftIO $ modifyMVar_ var $
838 pure . (r_state . at ngramsType %~ (Just . f . something))
841 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
842 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
844 -- TODO refactor with putListNgrams
845 -- The list must be non-empty!
846 -- The added ngrams must be non-existent!
847 addListNgrams :: RepoCmdM env err m
848 => NodeId -> NgramsType
849 -> [NgramsElement] -> m ()
850 addListNgrams listId ngramsType nes = do
852 liftIO $ modifyMVar_ var $
853 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
856 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
859 rmListNgrams :: RepoCmdM env err m
863 rmListNgrams l nt = setListNgrams l nt mempty
865 -- | TODO: incr the Version number
866 -- && should use patch
867 setListNgrams :: RepoCmdM env err m
870 -> Map NgramsTerm NgramsRepoElement
872 setListNgrams listId ngramsType ns = do
874 liftIO $ modifyMVar_ var $
878 (at listId .~ ( Just ns))
885 -- If the given list of ngrams elements contains ngrams already in
886 -- the repo, they will be ignored.
887 putListNgrams :: RepoCmdM env err m
888 => NodeId -> NgramsType
889 -> [NgramsElement] -> m ()
890 putListNgrams _ _ [] = pure ()
891 putListNgrams listId ngramsType nes = putListNgrams' listId ngramsType m
893 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
895 putListNgrams' :: RepoCmdM env err m
896 => ListId -> NgramsType
897 -> Map NgramsTerm NgramsRepoElement
899 putListNgrams' listId ngramsType ns = do
900 -- printDebug "putListNgrams" (length nes)
902 liftIO $ modifyMVar_ var $
919 tableNgramsPost :: RepoCmdM env err m
923 -> [NgramsTerm] -> m ()
924 tableNgramsPost tabType listId mayList =
925 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
927 currentVersion :: RepoCmdM env err m
931 r <- liftIO $ readMVar var
932 pure $ r ^. r_version
934 tableNgramsPull :: RepoCmdM env err m
935 => ListId -> NgramsType
937 -> m (Versioned NgramsTablePatch)
938 tableNgramsPull listId ngramsType p_version = do
940 r <- liftIO $ readMVar var
943 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
944 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
946 pure (Versioned (r ^. r_version) q_table)
948 -- Apply the given patch to the DB and returns the patch to be applied on the
951 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
953 -> Versioned NgramsTablePatch
954 -> m (Versioned NgramsTablePatch)
955 tableNgramsPut tabType listId (Versioned p_version p_table)
956 | p_table == mempty = do
957 let ngramsType = ngramsTypeFromTabType tabType
958 tableNgramsPull listId ngramsType p_version
961 let ngramsType = ngramsTypeFromTabType tabType
962 (p0, p0_validity) = PM.singleton listId p_table
963 (p, p_validity) = PM.singleton ngramsType p0
965 assertValid p0_validity
966 assertValid p_validity
969 vq' <- liftIO $ modifyMVar var $ \r -> do
971 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
972 (p', q') = transformWith ngramsStatePatchConflictResolution p q
973 r' = r & r_version +~ 1
975 & r_history %~ (p' :)
976 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
978 -- Ideally we would like to check these properties. However:
979 -- * They should be checked only to debug the code. The client data
980 -- should be able to trigger these.
981 -- * What kind of error should they throw (we are in IO here)?
982 -- * Should we keep modifyMVar?
983 -- * Should we throw the validation in an Exception, catch it around
984 -- modifyMVar and throw it back as an Error?
985 assertValid $ transformable p q
986 assertValid $ applicable p' (r ^. r_state)
988 pure (r', Versioned (r' ^. r_version) q'_table)
993 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
994 mergeNgramsElement _neOld neNew = neNew
996 { _ne_list :: ListType
997 If we merge the parents/children we can potentially create cycles!
998 , _ne_parent :: Maybe NgramsTerm
999 , _ne_children :: MSet NgramsTerm
1003 getNgramsTableMap :: RepoCmdM env err m
1006 -> m (Versioned NgramsTableMap)
1007 getNgramsTableMap nodeId ngramsType = do
1009 repo <- liftIO $ readMVar v
1010 pure $ Versioned (repo ^. r_version)
1011 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
1016 -- | TODO Errors management
1017 -- TODO: polymorphic for Annuaire or Corpus or ...
1018 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
1019 -- TODO: should take only one ListId
1021 getTime' :: MonadIO m => m TimeSpec
1022 getTime' = liftIO $ getTime ProcessCPUTime
1025 getTableNgrams :: forall env err m.
1026 (RepoCmdM env err m, HasNodeError err, HasConnection env)
1027 => NodeType -> NodeId -> TabType
1028 -> ListId -> Limit -> Maybe Offset
1030 -> Maybe MinSize -> Maybe MaxSize
1032 -> (NgramsTerm -> Bool)
1033 -> m (Versioned NgramsTable)
1034 getTableNgrams _nType nId tabType listId limit_ offset
1035 listType minSize maxSize orderBy searchQuery = do
1038 -- lIds <- selectNodesWithUsername NodeList userMaster
1040 ngramsType = ngramsTypeFromTabType tabType
1041 offset' = maybe 0 identity offset
1042 listType' = maybe (const True) (==) listType
1043 minSize' = maybe (const True) (<=) minSize
1044 maxSize' = maybe (const True) (>=) maxSize
1046 selected_node n = minSize' s
1048 && searchQuery (n ^. ne_ngrams)
1049 && listType' (n ^. ne_list)
1053 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1055 ---------------------------------------
1056 sortOnOrder Nothing = identity
1057 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1058 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1059 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1060 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1062 ---------------------------------------
1063 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1064 selectAndPaginate tableMap = roots <> inners
1066 list = tableMap ^.. each
1067 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1069 selected_nodes = list & take limit_
1071 . filter selected_node
1072 . sortOnOrder orderBy
1073 roots = rootOf <$> selected_nodes
1074 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1075 inners = list & filter (selected_inner rootsSet)
1077 ---------------------------------------
1078 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1079 setScores False table = pure table
1080 setScores True table = do
1081 let ngrams_terms = (table ^.. each . ne_ngrams)
1083 occurrences <- getOccByNgramsOnlyFast' nId
1088 liftIO $ hprint stderr
1089 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1090 (length ngrams_terms) t1 t2
1092 occurrences <- getOccByNgramsOnlySlow nType nId
1098 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1100 pure $ table & each %~ setOcc
1101 ---------------------------------------
1103 -- lists <- catMaybes <$> listsWith userMaster
1104 -- trace (show lists) $
1105 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1107 let scoresNeeded = needsScores orderBy
1108 tableMap1 <- getNgramsTableMap listId ngramsType
1110 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1111 . Map.mapWithKey ngramsElementFromRepo
1113 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1114 . setScores (not scoresNeeded)
1117 liftIO $ hprint stderr
1118 ("getTableNgrams total=" % timeSpecs
1119 % " map1=" % timeSpecs
1120 % " map2=" % timeSpecs
1121 % " map3=" % timeSpecs
1122 % " sql=" % (if scoresNeeded then "map2" else "map3")
1124 ) t0 t3 t0 t1 t1 t2 t2 t3
1130 -- TODO: find a better place for the code above, All APIs stay here
1131 type QueryParamR = QueryParam' '[Required, Strict]
1133 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1134 deriving (Generic, Enum, Bounded, Read, Show)
1136 instance FromHttpApiData OrderBy
1138 parseUrlPiece "TermAsc" = pure TermAsc
1139 parseUrlPiece "TermDesc" = pure TermDesc
1140 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1141 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1142 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1145 instance ToParamSchema OrderBy
1146 instance FromJSON OrderBy
1147 instance ToJSON OrderBy
1148 instance ToSchema OrderBy
1149 instance Arbitrary OrderBy
1151 arbitrary = elements [minBound..maxBound]
1153 needsScores :: Maybe OrderBy -> Bool
1154 needsScores (Just ScoreAsc) = True
1155 needsScores (Just ScoreDesc) = True
1156 needsScores _ = False
1158 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1159 :> QueryParamR "ngramsType" TabType
1160 :> QueryParamR "list" ListId
1161 :> QueryParamR "limit" Limit
1162 :> QueryParam "offset" Offset
1163 :> QueryParam "listType" ListType
1164 :> QueryParam "minTermSize" MinSize
1165 :> QueryParam "maxTermSize" MaxSize
1166 :> QueryParam "orderBy" OrderBy
1167 :> QueryParam "search" Text
1168 :> Get '[JSON] (Versioned NgramsTable)
1170 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1171 :> QueryParamR "ngramsType" TabType
1172 :> QueryParamR "list" ListId
1173 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1174 :> Put '[JSON] (Versioned NgramsTablePatch)
1176 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1177 :> QueryParamR "ngramsType" TabType
1178 :> QueryParamR "list" ListId
1179 :> QueryParam "listType" ListType
1180 :> ReqBody '[JSON] [NgramsTerm]
1183 type TableNgramsApi = TableNgramsApiGet
1184 :<|> TableNgramsApiPut
1185 :<|> TableNgramsApiPost
1187 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1188 => NodeId -> TabType
1189 -> ListId -> Limit -> Maybe Offset
1191 -> Maybe MinSize -> Maybe MaxSize
1193 -> Maybe Text -- full text search
1194 -> m (Versioned NgramsTable)
1195 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1196 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1198 searchQuery = maybe (const True) isInfixOf mt
1200 -- | Text search is deactivated for now for ngrams by doc only
1201 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1203 -> ListId -> Limit -> Maybe Offset
1205 -> Maybe MinSize -> Maybe MaxSize
1207 -> Maybe Text -- full text search
1208 -> m (Versioned NgramsTable)
1209 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1210 ns <- selectNodesWithUsername NodeList userMaster
1211 let ngramsType = ngramsTypeFromTabType tabType
1212 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1213 let searchQuery = flip S.member (S.fromList ngs)
1214 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1218 apiNgramsTableCorpus :: ( RepoCmdM env err m
1220 , HasInvalidError err
1223 => NodeId -> ServerT TableNgramsApi m
1224 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1226 :<|> tableNgramsPost
1229 apiNgramsTableDoc :: ( RepoCmdM env err m
1231 , HasInvalidError err
1234 => DocId -> ServerT TableNgramsApi m
1235 apiNgramsTableDoc dId = getTableNgramsDoc dId
1237 :<|> tableNgramsPost
1238 -- > add new ngrams in database (TODO AD)
1239 -- > index all the corpus accordingly (TODO AD)
1241 listNgramsChangedSince :: RepoCmdM env err m
1242 => ListId -> NgramsType -> Version -> m (Versioned Bool)
1243 listNgramsChangedSince listId ngramsType version
1245 Versioned <$> currentVersion <*> pure True
1247 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1250 instance Arbitrary NgramsRepoElement where
1251 arbitrary = elements $ map ngramsElementToRepo ns
1253 NgramsTable ns = mockTable
1256 instance FromHttpApiData (Map NgramsType (Versioned NgramsTableMap))
1258 parseUrlPiece x = maybeToEither x (decode $ cs x)