1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE DataKinds #-}
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24 {-# LANGUAGE ScopedTypeVariables #-}
25 {-# LANGUAGE TemplateHaskell #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE FlexibleContexts #-}
28 {-# LANGUAGE FlexibleInstances #-}
29 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
30 {-# LANGUAGE MultiParamTypeClasses #-}
31 {-# LANGUAGE RankNTypes #-}
32 {-# LANGUAGE TypeFamilies #-}
33 {-# OPTIONS -fno-warn-orphans #-}
35 module Gargantext.API.Ngrams
44 , apiNgramsTableCorpus
64 , NgramsRepoElement(..)
73 , ngramsTypeFromTabType
89 , listNgramsChangedSince
93 -- import Debug.Trace (trace)
94 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
95 -- import Gargantext.Database.Schema.User (UserId)
96 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
97 Composable(..), Transformable(..),
98 PairPatch(..), Patched, ConflictResolution,
99 ConflictResolutionReplace, ours)
100 import qualified Data.Map.Strict.Patch as PM
102 import Data.Ord (Down(..))
104 --import Data.Semigroup
105 import Data.Set (Set)
106 import qualified Data.Set as S
107 import qualified Data.List as List
108 import Data.Maybe (fromMaybe)
109 -- import Data.Tuple.Extra (first)
110 import qualified Data.Map.Strict as Map
111 import Data.Map.Strict (Map)
112 import qualified Data.Set as Set
113 import Control.Category ((>>>))
114 import Control.Concurrent
115 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
116 import Control.Monad.Error.Class (MonadError)
117 import Control.Monad.Reader
118 import Control.Monad.State
119 import Data.Aeson hiding ((.=))
120 import Data.Aeson.TH (deriveJSON)
121 import Data.Either(Either(Left))
122 -- import Data.Map (lookup)
123 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
124 import Data.Swagger hiding (version, patch)
125 import Data.Text (Text, isInfixOf, count)
127 import Formatting (hprint, int, (%))
128 import Formatting.Clock (timeSpecs)
129 import GHC.Generics (Generic)
130 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
131 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
132 import Gargantext.Database.Config (userMaster)
133 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast)
134 import Gargantext.Database.Schema.Ngrams (NgramsType)
135 import Gargantext.Database.Types.Node (NodeType(..))
136 import Gargantext.Database.Utils (fromField', HasConnection)
137 import Gargantext.Database.Node.Select
138 import Gargantext.Database.Ngrams
139 --import Gargantext.Database.Lists (listsWith)
140 import Gargantext.Database.Schema.Node (HasNodeError)
141 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
142 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
143 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
144 import Gargantext.Prelude
145 -- import Gargantext.Core.Types (ListTypeId, listTypeId)
146 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
147 import Servant hiding (Patch)
148 import System.Clock (getTime, TimeSpec, Clock(..))
149 import System.FileLock (FileLock)
150 import System.IO (stderr)
151 import Test.QuickCheck (elements)
152 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
157 instance ToSchema TODO where
158 instance ToParamSchema TODO where
160 ------------------------------------------------------------------------
161 --data FacetFormat = Table | Chart
162 data TabType = Docs | Trash | MoreFav | MoreTrash
163 | Terms | Sources | Authors | Institutes
165 deriving (Generic, Enum, Bounded, Show)
167 instance FromHttpApiData TabType
169 parseUrlPiece "Docs" = pure Docs
170 parseUrlPiece "Trash" = pure Trash
171 parseUrlPiece "MoreFav" = pure MoreFav
172 parseUrlPiece "MoreTrash" = pure MoreTrash
174 parseUrlPiece "Terms" = pure Terms
175 parseUrlPiece "Sources" = pure Sources
176 parseUrlPiece "Institutes" = pure Institutes
177 parseUrlPiece "Authors" = pure Authors
179 parseUrlPiece "Contacts" = pure Contacts
181 parseUrlPiece _ = Left "Unexpected value of TabType"
183 instance ToParamSchema TabType
184 instance ToJSON TabType
185 instance FromJSON TabType
186 instance ToSchema TabType
187 instance Arbitrary TabType
189 arbitrary = elements [minBound .. maxBound]
191 newtype MSet a = MSet (Map a ())
192 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
194 instance ToJSON a => ToJSON (MSet a) where
195 toJSON (MSet m) = toJSON (Map.keys m)
196 toEncoding (MSet m) = toEncoding (Map.keys m)
198 mSetFromSet :: Set a -> MSet a
199 mSetFromSet = MSet . Map.fromSet (const ())
201 mSetFromList :: Ord a => [a] -> MSet a
202 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
204 -- mSetToSet :: Ord a => MSet a -> Set a
205 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
206 mSetToSet :: Ord a => MSet a -> Set a
207 mSetToSet = Set.fromList . mSetToList
209 mSetToList :: MSet a -> [a]
210 mSetToList (MSet a) = Map.keys a
212 instance Foldable MSet where
213 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
215 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
216 parseJSON = fmap mSetFromList . parseJSON
218 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
220 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
222 ------------------------------------------------------------------------
223 type NgramsTerm = Text
225 data RootParent = RootParent
226 { _rp_root :: NgramsTerm
227 , _rp_parent :: NgramsTerm
229 deriving (Ord, Eq, Show, Generic)
231 deriveJSON (unPrefix "_rp_") ''RootParent
232 makeLenses ''RootParent
234 data NgramsRepoElement = NgramsRepoElement
236 , _nre_list :: ListType
237 --, _nre_root_parent :: Maybe RootParent
238 , _nre_root :: Maybe NgramsTerm
239 , _nre_parent :: Maybe NgramsTerm
240 , _nre_children :: MSet NgramsTerm
242 deriving (Ord, Eq, Show, Generic)
244 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
245 makeLenses ''NgramsRepoElement
248 NgramsElement { _ne_ngrams :: NgramsTerm
250 , _ne_list :: ListType
251 , _ne_occurrences :: Int
252 , _ne_root :: Maybe NgramsTerm
253 , _ne_parent :: Maybe NgramsTerm
254 , _ne_children :: MSet NgramsTerm
256 deriving (Ord, Eq, Show, Generic)
258 deriveJSON (unPrefix "_ne_") ''NgramsElement
259 makeLenses ''NgramsElement
261 mkNgramsElement :: NgramsTerm -> ListType -> Maybe RootParent -> MSet NgramsTerm -> NgramsElement
262 mkNgramsElement ngrams list rp children =
263 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
266 size = 1 + count " " ngrams
268 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
269 newNgramsElement mayList ngrams = mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
271 instance ToSchema NgramsElement where
272 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
273 instance Arbitrary NgramsElement where
274 arbitrary = elements [newNgramsElement Nothing "sport"]
276 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
278 (NgramsElement { _ne_size = s
292 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
293 ngramsElementFromRepo
302 NgramsElement { _ne_size = s
307 , _ne_ngrams = ngrams
308 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
310 -- Here we could use 0 if we want to avoid any `panic`.
311 -- It will not happen using getTableNgrams if
312 -- getOccByNgramsOnly provides a count of occurrences for
313 -- all the ngrams given.
317 ------------------------------------------------------------------------
318 newtype NgramsTable = NgramsTable [NgramsElement]
319 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
321 type ListNgrams = NgramsTable
323 makePrisms ''NgramsTable
325 -- | Question: why these repetition of Type in this instance
326 -- may you document it please ?
327 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
328 each = _NgramsTable . each
331 -- | TODO Check N and Weight
333 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
334 toNgramsElement ns = map toNgramsElement' ns
336 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
340 Just x -> lookup x mapParent
341 c' = maybe mempty identity $ lookup t mapChildren
342 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
344 mapParent :: Map Int Text
345 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
347 mapChildren :: Map Text (Set Text)
348 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
349 $ Map.fromListWith (<>)
350 $ map (first fromJust)
351 $ filter (isJust . fst)
352 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
355 mockTable :: NgramsTable
356 mockTable = NgramsTable
357 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
358 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
359 , mkNgramsElement "cats" StopTerm Nothing mempty
360 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
361 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
362 , mkNgramsElement "fox" GraphTerm Nothing mempty
363 , mkNgramsElement "object" CandidateTerm Nothing mempty
364 , mkNgramsElement "nothing" StopTerm Nothing mempty
365 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
366 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
367 , mkNgramsElement "moon" CandidateTerm Nothing mempty
368 , mkNgramsElement "sky" StopTerm Nothing mempty
371 rp n = Just $ RootParent n n
373 instance Arbitrary NgramsTable where
374 arbitrary = pure mockTable
376 instance ToSchema NgramsTable
378 ------------------------------------------------------------------------
379 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
381 ------------------------------------------------------------------------
382 -- On the Client side:
383 --data Action = InGroup NgramsId NgramsId
384 -- | OutGroup NgramsId NgramsId
385 -- | SetListType NgramsId ListType
387 data PatchSet a = PatchSet
391 deriving (Eq, Ord, Show, Generic)
393 makeLenses ''PatchSet
394 makePrisms ''PatchSet
396 instance ToJSON a => ToJSON (PatchSet a) where
397 toJSON = genericToJSON $ unPrefix "_"
398 toEncoding = genericToEncoding $ unPrefix "_"
400 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
401 parseJSON = genericParseJSON $ unPrefix "_"
404 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
405 arbitrary = PatchSet <$> arbitrary <*> arbitrary
407 type instance Patched (PatchSet a) = Set a
409 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
410 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
412 instance Ord a => Semigroup (PatchSet a) where
413 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
414 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
417 instance Ord a => Monoid (PatchSet a) where
418 mempty = PatchSet mempty mempty
420 instance Ord a => Group (PatchSet a) where
421 invert (PatchSet r a) = PatchSet a r
423 instance Ord a => Composable (PatchSet a) where
424 composable _ _ = undefined
426 instance Ord a => Action (PatchSet a) (Set a) where
427 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
429 instance Applicable (PatchSet a) (Set a) where
430 applicable _ _ = mempty
432 instance Ord a => Validity (PatchSet a) where
433 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
435 instance Ord a => Transformable (PatchSet a) where
436 transformable = undefined
438 conflicts _p _q = undefined
440 transformWith conflict p q = undefined conflict p q
442 instance ToSchema a => ToSchema (PatchSet a)
445 type AddRem = Replace (Maybe ())
447 remPatch, addPatch :: AddRem
448 remPatch = replace (Just ()) Nothing
449 addPatch = replace Nothing (Just ())
451 isRem :: Replace (Maybe ()) -> Bool
452 isRem = (== remPatch)
454 type PatchMap = PM.PatchMap
456 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
457 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
458 Transformable, Composable)
460 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
461 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
463 -- TODO this breaks module abstraction
464 makePrisms ''PM.PatchMap
466 makePrisms ''PatchMSet
468 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
469 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
471 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
472 f = Map.partition isRem >>> both %~ Map.keysSet
474 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
475 g (rems, adds) = Map.fromSet (const remPatch) rems
476 <> Map.fromSet (const addPatch) adds
478 instance Ord a => Action (PatchMSet a) (MSet a) where
479 act (PatchMSet p) (MSet m) = MSet $ act p m
481 instance Ord a => Applicable (PatchMSet a) (MSet a) where
482 applicable (PatchMSet p) (MSet m) = applicable p m
484 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
485 toJSON = toJSON . view _PatchMSetIso
486 toEncoding = toEncoding . view _PatchMSetIso
488 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
489 parseJSON = fmap (_PatchMSetIso #) . parseJSON
491 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
492 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
494 instance ToSchema a => ToSchema (PatchMSet a) where
496 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
498 type instance Patched (PatchMSet a) = MSet a
500 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
501 arbitrary = uncurry replace <$> arbitrary
502 -- If they happen to be equal then the patch is Keep.
504 instance ToSchema a => ToSchema (Replace a) where
505 declareNamedSchema (_ :: Proxy (Replace a)) = do
506 -- TODO Keep constructor is not supported here.
507 aSchema <- declareSchemaRef (Proxy :: Proxy a)
508 return $ NamedSchema (Just "Replace") $ mempty
509 & type_ ?~ SwaggerObject
511 InsOrdHashMap.fromList
515 & required .~ [ "old", "new" ]
518 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
519 , _patch_list :: Replace ListType -- TODO Map UserId ListType
521 deriving (Eq, Show, Generic)
523 deriveJSON (unPrefix "_") ''NgramsPatch
524 makeLenses ''NgramsPatch
526 instance ToSchema NgramsPatch where
527 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
529 instance Arbitrary NgramsPatch where
530 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
532 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
534 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
535 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
537 instance Semigroup NgramsPatch where
538 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
540 instance Monoid NgramsPatch where
541 mempty = _NgramsPatch # mempty
543 instance Validity NgramsPatch where
544 validate p = p ^. _NgramsPatch . to validate
546 instance Transformable NgramsPatch where
547 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
549 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
551 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
553 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
555 type ConflictResolutionNgramsPatch =
556 ( ConflictResolutionPatchMSet NgramsTerm
557 , ConflictResolutionReplace ListType
559 type instance ConflictResolution NgramsPatch =
560 ConflictResolutionNgramsPatch
562 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
563 -- ~ Patched NgramsPatchIso
564 type instance Patched NgramsPatch = PatchedNgramsPatch
566 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
567 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
568 applicable p (Just nre) =
569 applicable (p ^. patch_children) (nre ^. nre_children) <>
570 applicable (p ^. patch_list) (nre ^. nre_list)
572 instance Action NgramsPatch NgramsRepoElement where
573 act p = (nre_children %~ act (p ^. patch_children))
574 . (nre_list %~ act (p ^. patch_list))
576 instance Action NgramsPatch (Maybe NgramsRepoElement) where
579 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
580 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
582 instance FromField NgramsTablePatch
584 fromField = fromField'
586 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
588 fromField = fromField'
590 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
592 type instance ConflictResolution NgramsTablePatch =
593 NgramsTerm -> ConflictResolutionNgramsPatch
595 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
596 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
597 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
599 makePrisms ''NgramsTablePatch
600 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
601 instance ToSchema NgramsTablePatch
603 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
604 applicable p = applicable (p ^. _NgramsTablePatch)
606 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
608 fmap (execState (reParentNgramsTablePatch p)) .
609 act (p ^. _NgramsTablePatch)
611 instance Arbitrary NgramsTablePatch where
612 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
614 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
615 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
616 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
618 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
620 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
621 reRootChildren root ngram = do
622 nre <- use $ at ngram
623 forOf_ (_Just . nre_children . folded) nre $ \child -> do
624 at child . _Just . nre_root ?= root
625 reRootChildren root child
627 reParent :: Maybe RootParent -> ReParent NgramsTerm
628 reParent rp child = do
629 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
630 . (nre_root .~ (_rp_root <$> rp))
632 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
634 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
635 reParentAddRem rp child p =
636 reParent (if isRem p then Nothing else Just rp) child
638 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
639 reParentNgramsPatch parent ngramsPatch = do
640 root_of_parent <- use (at parent . _Just . nre_root)
642 root = fromMaybe parent root_of_parent
643 rp = RootParent { _rp_root = root, _rp_parent = parent }
644 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
645 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
647 reParentNgramsTablePatch :: ReParent NgramsTablePatch
648 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
649 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
651 ------------------------------------------------------------------------
652 ------------------------------------------------------------------------
655 data Versioned a = Versioned
656 { _v_version :: Version
659 deriving (Generic, Show)
660 deriveJSON (unPrefix "_v_") ''Versioned
661 makeLenses ''Versioned
662 instance ToSchema a => ToSchema (Versioned a) where
663 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
664 instance Arbitrary a => Arbitrary (Versioned a) where
665 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
668 -- TODO sequencs of modifications (Patchs)
669 type NgramsIdPatch = Patch NgramsId NgramsPatch
671 ngramsPatch :: Int -> NgramsPatch
672 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
674 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
675 toEdit n p = Edit n p
676 ngramsIdPatch :: Patch NgramsId NgramsPatch
677 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
678 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
679 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
682 -- applyPatchBack :: Patch -> IO Patch
683 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
685 ------------------------------------------------------------------------
686 ------------------------------------------------------------------------
687 ------------------------------------------------------------------------
690 -- TODO: Replace.old is ignored which means that if the current list
691 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
692 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
693 -- However this should not happen in non conflicting situations.
694 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
695 mkListsUpdate nt patches =
696 [ (ngramsTypeId nt, ng, listTypeId lt)
697 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
698 , lt <- patch ^.. patch_list . new
701 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
704 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
705 mkChildrenGroups addOrRem nt patches =
706 [ (ngramsTypeId nt, parent, child)
707 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
708 , child <- patch ^.. patch_children . to addOrRem . folded
712 ngramsTypeFromTabType :: TabType -> NgramsType
713 ngramsTypeFromTabType tabType =
714 let lieu = "Garg.API.Ngrams: " :: Text in
716 Sources -> Ngrams.Sources
717 Authors -> Ngrams.Authors
718 Institutes -> Ngrams.Institutes
719 Terms -> Ngrams.NgramsTerms
720 _ -> panic $ lieu <> "No Ngrams for this tab"
721 -- TODO: This `panic` would disapear with custom NgramsType.
723 ------------------------------------------------------------------------
725 { _r_version :: Version
728 -- first patch in the list is the most recent
732 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
733 parseJSON = genericParseJSON $ unPrefix "_r_"
735 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
736 toJSON = genericToJSON $ unPrefix "_r_"
737 toEncoding = genericToEncoding $ unPrefix "_r_"
741 initRepo :: Monoid s => Repo s p
742 initRepo = Repo 1 mempty []
744 type NgramsRepo = Repo NgramsState NgramsStatePatch
745 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
746 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
748 initMockRepo :: NgramsRepo
749 initMockRepo = Repo 1 s []
751 s = Map.singleton Ngrams.NgramsTerms
752 $ Map.singleton 47254
754 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
756 data RepoEnv = RepoEnv
757 { _renv_var :: !(MVar NgramsRepo)
758 , _renv_saver :: !(IO ())
759 , _renv_lock :: !FileLock
765 class HasRepoVar env where
766 repoVar :: Getter env (MVar NgramsRepo)
768 instance HasRepoVar (MVar NgramsRepo) where
771 class HasRepoSaver env where
772 repoSaver :: Getter env (IO ())
774 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
775 repoEnv :: Getter env RepoEnv
777 instance HasRepo RepoEnv where
780 instance HasRepoVar RepoEnv where
783 instance HasRepoSaver RepoEnv where
784 repoSaver = renv_saver
786 type RepoCmdM env err m =
792 ------------------------------------------------------------------------
794 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
796 saveRepo = liftIO =<< view repoSaver
798 listTypeConflictResolution :: ListType -> ListType -> ListType
799 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
801 ngramsStatePatchConflictResolution
802 :: NgramsType -> NodeId -> NgramsTerm
803 -> ConflictResolutionNgramsPatch
804 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
806 -- undefined {- TODO think this through -}, listTypeConflictResolution)
809 -- Insertions are not considered as patches,
810 -- they do not extend history,
811 -- they do not bump version.
812 insertNewOnly :: a -> Maybe b -> a
813 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
814 -- TODO error handling
816 something :: Monoid a => Maybe a -> a
817 something Nothing = mempty
818 something (Just a) = a
821 -- TODO refactor with putListNgrams
822 copyListNgrams :: RepoCmdM env err m
823 => NodeId -> NodeId -> NgramsType
825 copyListNgrams srcListId dstListId ngramsType = do
827 liftIO $ modifyMVar_ var $
828 pure . (r_state . at ngramsType %~ (Just . f . something))
831 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
832 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
834 -- TODO refactor with putListNgrams
835 -- The list must be non-empty!
836 -- The added ngrams must be non-existent!
837 addListNgrams :: RepoCmdM env err m
838 => NodeId -> NgramsType
839 -> [NgramsElement] -> m ()
840 addListNgrams listId ngramsType nes = do
842 liftIO $ modifyMVar_ var $
843 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
846 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
849 -- If the given list of ngrams elements contains ngrams already in
850 -- the repo, they will be ignored.
851 putListNgrams :: RepoCmdM env err m
852 => NodeId -> NgramsType
853 -> [NgramsElement] -> m ()
854 putListNgrams _ _ [] = pure ()
855 putListNgrams listId ngramsType nes = do
856 -- printDebug "putListNgrams" (length nes)
858 liftIO $ modifyMVar_ var $
859 pure . (r_state . at ngramsType %~ (Just . (at listId %~ (Just . (<> m) . something)) . something))
862 m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
865 tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> Maybe ListType -> [NgramsTerm] -> m ()
866 tableNgramsPost tabType listId mayList =
867 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
869 currentVersion :: RepoCmdM env err m => m Version
872 r <- liftIO $ readMVar var
873 pure $ r ^. r_version
875 tableNgramsPull :: RepoCmdM env err m
876 => ListId -> NgramsType
878 -> m (Versioned NgramsTablePatch)
879 tableNgramsPull listId ngramsType p_version = do
881 r <- liftIO $ readMVar var
884 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
885 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
887 pure (Versioned (r ^. r_version) q_table)
889 -- Apply the given patch to the DB and returns the patch to be applied on the
892 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
894 -> Versioned NgramsTablePatch
895 -> m (Versioned NgramsTablePatch)
896 tableNgramsPut tabType listId (Versioned p_version p_table)
897 | p_table == mempty = do
898 let ngramsType = ngramsTypeFromTabType tabType
899 tableNgramsPull listId ngramsType p_version
902 let ngramsType = ngramsTypeFromTabType tabType
903 (p0, p0_validity) = PM.singleton listId p_table
904 (p, p_validity) = PM.singleton ngramsType p0
906 assertValid p0_validity
907 assertValid p_validity
910 vq' <- liftIO $ modifyMVar var $ \r -> do
912 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
913 (p', q') = transformWith ngramsStatePatchConflictResolution p q
914 r' = r & r_version +~ 1
916 & r_history %~ (p' :)
917 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
919 -- Ideally we would like to check these properties. However:
920 -- * They should be checked only to debug the code. The client data
921 -- should be able to trigger these.
922 -- * What kind of error should they throw (we are in IO here)?
923 -- * Should we keep modifyMVar?
924 -- * Should we throw the validation in an Exception, catch it around
925 -- modifyMVar and throw it back as an Error?
926 assertValid $ transformable p q
927 assertValid $ applicable p' (r ^. r_state)
929 pure (r', Versioned (r' ^. r_version) q'_table)
934 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
935 mergeNgramsElement _neOld neNew = neNew
937 { _ne_list :: ListType
938 If we merge the parents/children we can potentially create cycles!
939 , _ne_parent :: Maybe NgramsTerm
940 , _ne_children :: MSet NgramsTerm
944 getNgramsTableMap :: RepoCmdM env err m
945 => NodeId -> NgramsType -> m (Versioned NgramsTableMap)
946 getNgramsTableMap nodeId ngramsType = do
948 repo <- liftIO $ readMVar v
949 pure $ Versioned (repo ^. r_version)
950 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
955 -- | TODO Errors management
956 -- TODO: polymorphic for Annuaire or Corpus or ...
957 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
958 -- TODO: should take only one ListId
960 getTime' :: MonadIO m => m TimeSpec
961 getTime' = liftIO $ getTime ProcessCPUTime
964 getTableNgrams :: forall env err m.
965 (RepoCmdM env err m, HasNodeError err, HasConnection env)
966 => NodeType -> NodeId -> TabType
967 -> ListId -> Limit -> Maybe Offset
969 -> Maybe MinSize -> Maybe MaxSize
971 -> (NgramsTerm -> Bool)
972 -> m (Versioned NgramsTable)
973 getTableNgrams _nType nId tabType listId limit_ offset
974 listType minSize maxSize orderBy searchQuery = do
977 -- lIds <- selectNodesWithUsername NodeList userMaster
979 ngramsType = ngramsTypeFromTabType tabType
980 offset' = maybe 0 identity offset
981 listType' = maybe (const True) (==) listType
982 minSize' = maybe (const True) (<=) minSize
983 maxSize' = maybe (const True) (>=) maxSize
985 selected_node n = minSize' s
987 && searchQuery (n ^. ne_ngrams)
988 && listType' (n ^. ne_list)
992 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
994 ---------------------------------------
995 sortOnOrder Nothing = identity
996 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
997 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
998 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
999 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1001 ---------------------------------------
1002 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1003 selectAndPaginate tableMap = roots <> inners
1005 list = tableMap ^.. each
1006 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1008 selected_nodes = list & take limit_
1010 . filter selected_node
1011 . sortOnOrder orderBy
1012 roots = rootOf <$> selected_nodes
1013 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1014 inners = list & filter (selected_inner rootsSet)
1016 ---------------------------------------
1017 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1018 setScores False table = pure table
1019 setScores True table = do
1020 let ngrams_terms = (table ^.. each . ne_ngrams)
1022 occurrences <- getOccByNgramsOnlyFast nId
1026 liftIO $ hprint stderr
1027 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1028 (length ngrams_terms) t1 t2
1030 occurrences <- getOccByNgramsOnlySlow nType nId
1036 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1038 pure $ table & each %~ setOcc
1039 ---------------------------------------
1041 -- lists <- catMaybes <$> listsWith userMaster
1042 -- trace (show lists) $
1043 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1045 let nSco = needsScores orderBy
1046 tableMap1 <- getNgramsTableMap listId ngramsType
1048 tableMap2 <- tableMap1 & v_data %%~ setScores nSco
1049 . Map.mapWithKey ngramsElementFromRepo
1051 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1052 . setScores (not nSco)
1055 liftIO $ hprint stderr
1056 ("getTableNgrams total=" % timeSpecs
1057 % " map1=" % timeSpecs
1058 % " map2=" % timeSpecs
1059 % " map3=" % timeSpecs
1060 % " sql=" % (if nSco then "map2" else "map3")
1062 ) t0 t3 t0 t1 t1 t2 t2 t3
1068 -- TODO: find a better place for the code above, All APIs stay here
1069 type QueryParamR = QueryParam' '[Required, Strict]
1072 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1073 deriving (Generic, Enum, Bounded, Read, Show)
1075 instance FromHttpApiData OrderBy
1077 parseUrlPiece "TermAsc" = pure TermAsc
1078 parseUrlPiece "TermDesc" = pure TermDesc
1079 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1080 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1081 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1083 instance ToParamSchema OrderBy
1084 instance FromJSON OrderBy
1085 instance ToJSON OrderBy
1086 instance ToSchema OrderBy
1087 instance Arbitrary OrderBy
1089 arbitrary = elements [minBound..maxBound]
1091 needsScores :: Maybe OrderBy -> Bool
1092 needsScores (Just ScoreAsc) = True
1093 needsScores (Just ScoreDesc) = True
1094 needsScores _ = False
1096 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1097 :> QueryParamR "ngramsType" TabType
1098 :> QueryParamR "list" ListId
1099 :> QueryParamR "limit" Limit
1100 :> QueryParam "offset" Offset
1101 :> QueryParam "listType" ListType
1102 :> QueryParam "minTermSize" MinSize
1103 :> QueryParam "maxTermSize" MaxSize
1104 :> QueryParam "orderBy" OrderBy
1105 :> QueryParam "search" Text
1106 :> Get '[JSON] (Versioned NgramsTable)
1108 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1109 :> QueryParamR "ngramsType" TabType
1110 :> QueryParamR "list" ListId
1111 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1112 :> Put '[JSON] (Versioned NgramsTablePatch)
1114 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1115 :> QueryParamR "ngramsType" TabType
1116 :> QueryParamR "list" ListId
1117 :> QueryParam "listType" ListType
1118 :> ReqBody '[JSON] [NgramsTerm]
1121 type TableNgramsApi = TableNgramsApiGet
1122 :<|> TableNgramsApiPut
1123 :<|> TableNgramsApiPost
1125 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1126 => NodeId -> TabType
1127 -> ListId -> Limit -> Maybe Offset
1129 -> Maybe MinSize -> Maybe MaxSize
1131 -> Maybe Text -- full text search
1132 -> m (Versioned NgramsTable)
1133 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1134 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1136 searchQuery = maybe (const True) isInfixOf mt
1138 -- | Text search is deactivated for now for ngrams by doc only
1139 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1141 -> ListId -> Limit -> Maybe Offset
1143 -> Maybe MinSize -> Maybe MaxSize
1145 -> Maybe Text -- full text search
1146 -> m (Versioned NgramsTable)
1147 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1148 ns <- selectNodesWithUsername NodeList userMaster
1149 let ngramsType = ngramsTypeFromTabType tabType
1150 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1151 let searchQuery = flip S.member (S.fromList ngs)
1152 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1158 apiNgramsTableCorpus :: ( RepoCmdM env err m
1160 , HasInvalidError err
1163 => NodeId -> ServerT TableNgramsApi m
1164 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1166 :<|> tableNgramsPost
1169 apiNgramsTableDoc :: ( RepoCmdM env err m
1171 , HasInvalidError err
1174 => DocId -> ServerT TableNgramsApi m
1175 apiNgramsTableDoc dId = getTableNgramsDoc dId
1177 :<|> tableNgramsPost
1178 -- > add new ngrams in database (TODO AD)
1179 -- > index all the corpus accordingly (TODO AD)
1181 listNgramsChangedSince :: RepoCmdM env err m => ListId -> NgramsType -> Version -> m (Versioned Bool)
1182 listNgramsChangedSince listId ngramsType version
1184 Versioned <$> currentVersion <*> pure True
1186 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)