1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE DataKinds #-}
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24 {-# LANGUAGE ScopedTypeVariables #-}
25 {-# LANGUAGE TemplateHaskell #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE FlexibleContexts #-}
28 {-# LANGUAGE FlexibleInstances #-}
29 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
30 {-# LANGUAGE MultiParamTypeClasses #-}
31 {-# LANGUAGE RankNTypes #-}
32 {-# LANGUAGE TypeFamilies #-}
33 {-# OPTIONS -fno-warn-orphans #-}
35 module Gargantext.API.Ngrams
44 , apiNgramsTableCorpus
64 , NgramsRepoElement(..)
73 , ngramsTypeFromTabType
89 -- import Debug.Trace (trace)
90 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
91 -- import Gargantext.Database.Schema.User (UserId)
92 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
93 Composable(..), Transformable(..),
94 PairPatch(..), Patched, ConflictResolution,
95 ConflictResolutionReplace, ours)
96 import qualified Data.Map.Strict.Patch as PM
98 import Data.Ord (Down(..))
100 --import Data.Semigroup
101 import Data.Set (Set)
102 import qualified Data.Set as S
103 import qualified Data.List as List
104 import Data.Maybe (fromMaybe)
105 -- import Data.Tuple.Extra (first)
106 import qualified Data.Map.Strict as Map
107 import Data.Map.Strict (Map)
108 import qualified Data.Set as Set
109 import Control.Category ((>>>))
110 import Control.Concurrent
111 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
112 import Control.Monad.Error.Class (MonadError)
113 import Control.Monad.Reader
114 import Control.Monad.State
115 import Data.Aeson hiding ((.=))
116 import Data.Aeson.TH (deriveJSON)
117 import Data.Either(Either(Left))
118 -- import Data.Map (lookup)
119 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
120 import Data.Swagger hiding (version, patch)
121 import Data.Text (Text, isInfixOf, count)
123 import Formatting (hprint, int, (%))
124 import Formatting.Clock (timeSpecs)
125 import GHC.Generics (Generic)
126 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
127 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
128 import Gargantext.Database.Config (userMaster)
129 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast)
130 import Gargantext.Database.Schema.Ngrams (NgramsType)
131 import Gargantext.Database.Types.Node (NodeType(..))
132 import Gargantext.Database.Utils (fromField', HasConnection)
133 import Gargantext.Database.Node.Select
134 import Gargantext.Database.Ngrams
135 --import Gargantext.Database.Lists (listsWith)
136 import Gargantext.Database.Schema.Node (HasNodeError)
137 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
138 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
139 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
140 import Gargantext.Prelude
141 -- import Gargantext.Core.Types (ListTypeId, listTypeId)
142 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
143 import Servant hiding (Patch)
144 import System.Clock (getTime, TimeSpec, Clock(..))
145 import System.FileLock (FileLock)
146 import System.IO (stderr)
147 import Test.QuickCheck (elements)
148 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
153 instance ToSchema TODO where
154 instance ToParamSchema TODO where
156 ------------------------------------------------------------------------
157 --data FacetFormat = Table | Chart
158 data TabType = Docs | Trash | MoreFav | MoreTrash
159 | Terms | Sources | Authors | Institutes
161 deriving (Generic, Enum, Bounded, Show)
163 instance FromHttpApiData TabType
165 parseUrlPiece "Docs" = pure Docs
166 parseUrlPiece "Trash" = pure Trash
167 parseUrlPiece "MoreFav" = pure MoreFav
168 parseUrlPiece "MoreTrash" = pure MoreTrash
170 parseUrlPiece "Terms" = pure Terms
171 parseUrlPiece "Sources" = pure Sources
172 parseUrlPiece "Institutes" = pure Institutes
173 parseUrlPiece "Authors" = pure Authors
175 parseUrlPiece "Contacts" = pure Contacts
177 parseUrlPiece _ = Left "Unexpected value of TabType"
179 instance ToParamSchema TabType
180 instance ToJSON TabType
181 instance FromJSON TabType
182 instance ToSchema TabType
183 instance Arbitrary TabType
185 arbitrary = elements [minBound .. maxBound]
187 newtype MSet a = MSet (Map a ())
188 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
190 instance ToJSON a => ToJSON (MSet a) where
191 toJSON (MSet m) = toJSON (Map.keys m)
192 toEncoding (MSet m) = toEncoding (Map.keys m)
194 mSetFromSet :: Set a -> MSet a
195 mSetFromSet = MSet . Map.fromSet (const ())
197 mSetFromList :: Ord a => [a] -> MSet a
198 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
200 -- mSetToSet :: Ord a => MSet a -> Set a
201 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
202 mSetToSet :: Ord a => MSet a -> Set a
203 mSetToSet = Set.fromList . mSetToList
205 mSetToList :: MSet a -> [a]
206 mSetToList (MSet a) = Map.keys a
208 instance Foldable MSet where
209 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
211 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
212 parseJSON = fmap mSetFromList . parseJSON
214 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
216 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
218 ------------------------------------------------------------------------
219 type NgramsTerm = Text
221 data RootParent = RootParent
222 { _rp_root :: NgramsTerm
223 , _rp_parent :: NgramsTerm
225 deriving (Ord, Eq, Show, Generic)
227 deriveJSON (unPrefix "_rp_") ''RootParent
228 makeLenses ''RootParent
230 data NgramsRepoElement = NgramsRepoElement
232 , _nre_list :: ListType
233 --, _nre_root_parent :: Maybe RootParent
234 , _nre_root :: Maybe NgramsTerm
235 , _nre_parent :: Maybe NgramsTerm
236 , _nre_children :: MSet NgramsTerm
238 deriving (Ord, Eq, Show, Generic)
240 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
241 makeLenses ''NgramsRepoElement
244 NgramsElement { _ne_ngrams :: NgramsTerm
246 , _ne_list :: ListType
247 , _ne_occurrences :: Int
248 , _ne_root :: Maybe NgramsTerm
249 , _ne_parent :: Maybe NgramsTerm
250 , _ne_children :: MSet NgramsTerm
252 deriving (Ord, Eq, Show, Generic)
254 deriveJSON (unPrefix "_ne_") ''NgramsElement
255 makeLenses ''NgramsElement
257 mkNgramsElement :: NgramsTerm -> ListType -> Maybe RootParent -> MSet NgramsTerm -> NgramsElement
258 mkNgramsElement ngrams list rp children =
259 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
262 size = 1 + count " " ngrams
264 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
265 newNgramsElement mayList ngrams = mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
267 instance ToSchema NgramsElement where
268 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
269 instance Arbitrary NgramsElement where
270 arbitrary = elements [newNgramsElement Nothing "sport"]
272 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
274 (NgramsElement { _ne_size = s
288 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
289 ngramsElementFromRepo
298 NgramsElement { _ne_size = s
303 , _ne_ngrams = ngrams
304 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
306 -- Here we could use 0 if we want to avoid any `panic`.
307 -- It will not happen using getTableNgrams if
308 -- getOccByNgramsOnly provides a count of occurrences for
309 -- all the ngrams given.
313 ------------------------------------------------------------------------
314 newtype NgramsTable = NgramsTable [NgramsElement]
315 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
317 type ListNgrams = NgramsTable
319 makePrisms ''NgramsTable
321 -- | Question: why these repetition of Type in this instance
322 -- may you document it please ?
323 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
324 each = _NgramsTable . each
327 -- | TODO Check N and Weight
329 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
330 toNgramsElement ns = map toNgramsElement' ns
332 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
336 Just x -> lookup x mapParent
337 c' = maybe mempty identity $ lookup t mapChildren
338 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
340 mapParent :: Map Int Text
341 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
343 mapChildren :: Map Text (Set Text)
344 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
345 $ Map.fromListWith (<>)
346 $ map (first fromJust)
347 $ filter (isJust . fst)
348 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
351 mockTable :: NgramsTable
352 mockTable = NgramsTable
353 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
354 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
355 , mkNgramsElement "cats" StopTerm Nothing mempty
356 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
357 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
358 , mkNgramsElement "fox" GraphTerm Nothing mempty
359 , mkNgramsElement "object" CandidateTerm Nothing mempty
360 , mkNgramsElement "nothing" StopTerm Nothing mempty
361 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
362 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
363 , mkNgramsElement "moon" CandidateTerm Nothing mempty
364 , mkNgramsElement "sky" StopTerm Nothing mempty
367 rp n = Just $ RootParent n n
369 instance Arbitrary NgramsTable where
370 arbitrary = pure mockTable
372 instance ToSchema NgramsTable
374 ------------------------------------------------------------------------
375 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
377 ------------------------------------------------------------------------
378 -- On the Client side:
379 --data Action = InGroup NgramsId NgramsId
380 -- | OutGroup NgramsId NgramsId
381 -- | SetListType NgramsId ListType
383 data PatchSet a = PatchSet
387 deriving (Eq, Ord, Show, Generic)
389 makeLenses ''PatchSet
390 makePrisms ''PatchSet
392 instance ToJSON a => ToJSON (PatchSet a) where
393 toJSON = genericToJSON $ unPrefix "_"
394 toEncoding = genericToEncoding $ unPrefix "_"
396 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
397 parseJSON = genericParseJSON $ unPrefix "_"
400 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
401 arbitrary = PatchSet <$> arbitrary <*> arbitrary
403 type instance Patched (PatchSet a) = Set a
405 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
406 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
408 instance Ord a => Semigroup (PatchSet a) where
409 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
410 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
413 instance Ord a => Monoid (PatchSet a) where
414 mempty = PatchSet mempty mempty
416 instance Ord a => Group (PatchSet a) where
417 invert (PatchSet r a) = PatchSet a r
419 instance Ord a => Composable (PatchSet a) where
420 composable _ _ = undefined
422 instance Ord a => Action (PatchSet a) (Set a) where
423 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
425 instance Applicable (PatchSet a) (Set a) where
426 applicable _ _ = mempty
428 instance Ord a => Validity (PatchSet a) where
429 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
431 instance Ord a => Transformable (PatchSet a) where
432 transformable = undefined
434 conflicts _p _q = undefined
436 transformWith conflict p q = undefined conflict p q
438 instance ToSchema a => ToSchema (PatchSet a)
441 type AddRem = Replace (Maybe ())
443 remPatch, addPatch :: AddRem
444 remPatch = replace (Just ()) Nothing
445 addPatch = replace Nothing (Just ())
447 isRem :: Replace (Maybe ()) -> Bool
448 isRem = (== remPatch)
450 type PatchMap = PM.PatchMap
452 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
453 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
454 Transformable, Composable)
456 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
457 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
459 -- TODO this breaks module abstraction
460 makePrisms ''PM.PatchMap
462 makePrisms ''PatchMSet
464 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
465 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
467 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
468 f = Map.partition isRem >>> both %~ Map.keysSet
470 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
471 g (rems, adds) = Map.fromSet (const remPatch) rems
472 <> Map.fromSet (const addPatch) adds
474 instance Ord a => Action (PatchMSet a) (MSet a) where
475 act (PatchMSet p) (MSet m) = MSet $ act p m
477 instance Ord a => Applicable (PatchMSet a) (MSet a) where
478 applicable (PatchMSet p) (MSet m) = applicable p m
480 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
481 toJSON = toJSON . view _PatchMSetIso
482 toEncoding = toEncoding . view _PatchMSetIso
484 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
485 parseJSON = fmap (_PatchMSetIso #) . parseJSON
487 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
488 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
490 instance ToSchema a => ToSchema (PatchMSet a) where
492 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
494 type instance Patched (PatchMSet a) = MSet a
496 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
497 arbitrary = uncurry replace <$> arbitrary
498 -- If they happen to be equal then the patch is Keep.
500 instance ToSchema a => ToSchema (Replace a) where
501 declareNamedSchema (_ :: Proxy (Replace a)) = do
502 -- TODO Keep constructor is not supported here.
503 aSchema <- declareSchemaRef (Proxy :: Proxy a)
504 return $ NamedSchema (Just "Replace") $ mempty
505 & type_ ?~ SwaggerObject
507 InsOrdHashMap.fromList
511 & required .~ [ "old", "new" ]
514 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
515 , _patch_list :: Replace ListType -- TODO Map UserId ListType
517 deriving (Eq, Show, Generic)
519 deriveJSON (unPrefix "_") ''NgramsPatch
520 makeLenses ''NgramsPatch
522 instance ToSchema NgramsPatch where
523 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
525 instance Arbitrary NgramsPatch where
526 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
528 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
530 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
531 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
533 instance Semigroup NgramsPatch where
534 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
536 instance Monoid NgramsPatch where
537 mempty = _NgramsPatch # mempty
539 instance Validity NgramsPatch where
540 validate p = p ^. _NgramsPatch . to validate
542 instance Transformable NgramsPatch where
543 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
545 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
547 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
549 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
551 type ConflictResolutionNgramsPatch =
552 ( ConflictResolutionPatchMSet NgramsTerm
553 , ConflictResolutionReplace ListType
555 type instance ConflictResolution NgramsPatch =
556 ConflictResolutionNgramsPatch
558 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
559 -- ~ Patched NgramsPatchIso
560 type instance Patched NgramsPatch = PatchedNgramsPatch
562 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
563 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
564 applicable p (Just nre) =
565 applicable (p ^. patch_children) (nre ^. nre_children) <>
566 applicable (p ^. patch_list) (nre ^. nre_list)
568 instance Action NgramsPatch NgramsRepoElement where
569 act p = (nre_children %~ act (p ^. patch_children))
570 . (nre_list %~ act (p ^. patch_list))
572 instance Action NgramsPatch (Maybe NgramsRepoElement) where
575 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
576 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
578 instance FromField NgramsTablePatch
580 fromField = fromField'
582 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
584 fromField = fromField'
586 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
588 type instance ConflictResolution NgramsTablePatch =
589 NgramsTerm -> ConflictResolutionNgramsPatch
591 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
592 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
593 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
595 makePrisms ''NgramsTablePatch
596 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
597 instance ToSchema NgramsTablePatch
599 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
600 applicable p = applicable (p ^. _NgramsTablePatch)
602 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
604 fmap (execState (reParentNgramsTablePatch p)) .
605 act (p ^. _NgramsTablePatch)
607 instance Arbitrary NgramsTablePatch where
608 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
610 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
611 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
612 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
614 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
616 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
617 reRootChildren root ngram = do
618 nre <- use $ at ngram
619 forOf_ (_Just . nre_children . folded) nre $ \child -> do
620 at child . _Just . nre_root ?= root
621 reRootChildren root child
623 reParent :: Maybe RootParent -> ReParent NgramsTerm
624 reParent rp child = do
625 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
626 . (nre_root .~ (_rp_root <$> rp))
628 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
630 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
631 reParentAddRem rp child p =
632 reParent (if isRem p then Nothing else Just rp) child
634 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
635 reParentNgramsPatch parent ngramsPatch = do
636 root_of_parent <- use (at parent . _Just . nre_root)
638 root = fromMaybe parent root_of_parent
639 rp = RootParent { _rp_root = root, _rp_parent = parent }
640 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
641 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
643 reParentNgramsTablePatch :: ReParent NgramsTablePatch
644 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
645 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
647 ------------------------------------------------------------------------
648 ------------------------------------------------------------------------
651 data Versioned a = Versioned
652 { _v_version :: Version
655 deriving (Generic, Show)
656 deriveJSON (unPrefix "_v_") ''Versioned
657 makeLenses ''Versioned
658 instance ToSchema a => ToSchema (Versioned a) where
659 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
660 instance Arbitrary a => Arbitrary (Versioned a) where
661 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
664 -- TODO sequencs of modifications (Patchs)
665 type NgramsIdPatch = Patch NgramsId NgramsPatch
667 ngramsPatch :: Int -> NgramsPatch
668 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
670 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
671 toEdit n p = Edit n p
672 ngramsIdPatch :: Patch NgramsId NgramsPatch
673 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
674 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
675 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
678 -- applyPatchBack :: Patch -> IO Patch
679 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
681 ------------------------------------------------------------------------
682 ------------------------------------------------------------------------
683 ------------------------------------------------------------------------
686 -- TODO: Replace.old is ignored which means that if the current list
687 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
688 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
689 -- However this should not happen in non conflicting situations.
690 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
691 mkListsUpdate nt patches =
692 [ (ngramsTypeId nt, ng, listTypeId lt)
693 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
694 , lt <- patch ^.. patch_list . new
697 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
700 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
701 mkChildrenGroups addOrRem nt patches =
702 [ (ngramsTypeId nt, parent, child)
703 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
704 , child <- patch ^.. patch_children . to addOrRem . folded
708 ngramsTypeFromTabType :: TabType -> NgramsType
709 ngramsTypeFromTabType tabType =
710 let lieu = "Garg.API.Ngrams: " :: Text in
712 Sources -> Ngrams.Sources
713 Authors -> Ngrams.Authors
714 Institutes -> Ngrams.Institutes
715 Terms -> Ngrams.NgramsTerms
716 _ -> panic $ lieu <> "No Ngrams for this tab"
717 -- TODO: This `panic` would disapear with custom NgramsType.
719 ------------------------------------------------------------------------
721 { _r_version :: Version
724 -- first patch in the list is the most recent
728 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
729 parseJSON = genericParseJSON $ unPrefix "_r_"
731 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
732 toJSON = genericToJSON $ unPrefix "_r_"
733 toEncoding = genericToEncoding $ unPrefix "_r_"
737 initRepo :: Monoid s => Repo s p
738 initRepo = Repo 1 mempty []
740 type NgramsRepo = Repo NgramsState NgramsStatePatch
741 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
742 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
744 initMockRepo :: NgramsRepo
745 initMockRepo = Repo 1 s []
747 s = Map.singleton Ngrams.NgramsTerms
748 $ Map.singleton 47254
750 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
752 data RepoEnv = RepoEnv
753 { _renv_var :: !(MVar NgramsRepo)
754 , _renv_saver :: !(IO ())
755 , _renv_lock :: !FileLock
761 class HasRepoVar env where
762 repoVar :: Getter env (MVar NgramsRepo)
764 instance HasRepoVar (MVar NgramsRepo) where
767 class HasRepoSaver env where
768 repoSaver :: Getter env (IO ())
770 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
771 repoEnv :: Getter env RepoEnv
773 instance HasRepo RepoEnv where
776 instance HasRepoVar RepoEnv where
779 instance HasRepoSaver RepoEnv where
780 repoSaver = renv_saver
782 type RepoCmdM env err m =
788 ------------------------------------------------------------------------
790 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
792 saveRepo = liftIO =<< view repoSaver
794 listTypeConflictResolution :: ListType -> ListType -> ListType
795 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
797 ngramsStatePatchConflictResolution
798 :: NgramsType -> NodeId -> NgramsTerm
799 -> ConflictResolutionNgramsPatch
800 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
802 -- undefined {- TODO think this through -}, listTypeConflictResolution)
805 -- Insertions are not considered as patches,
806 -- they do not extend history,
807 -- they do not bump version.
808 insertNewOnly :: a -> Maybe b -> a
809 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
810 -- TODO error handling
812 something :: Monoid a => Maybe a -> a
813 something Nothing = mempty
814 something (Just a) = a
817 -- TODO refactor with putListNgrams
818 copyListNgrams :: RepoCmdM env err m
819 => NodeId -> NodeId -> NgramsType
821 copyListNgrams srcListId dstListId ngramsType = do
823 liftIO $ modifyMVar_ var $
824 pure . (r_state . at ngramsType %~ (Just . f . something))
827 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
828 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
830 -- TODO refactor with putListNgrams
831 -- The list must be non-empty!
832 -- The added ngrams must be non-existent!
833 addListNgrams :: RepoCmdM env err m
834 => NodeId -> NgramsType
835 -> [NgramsElement] -> m ()
836 addListNgrams listId ngramsType nes = do
838 liftIO $ modifyMVar_ var $
839 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
842 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
845 -- If the given list of ngrams elements contains ngrams already in
846 -- the repo, they will be ignored.
847 putListNgrams :: RepoCmdM env err m
848 => NodeId -> NgramsType
849 -> [NgramsElement] -> m ()
850 putListNgrams _ _ [] = pure ()
851 putListNgrams listId ngramsType nes = do
852 -- printDebug "putListNgrams" (length nes)
854 liftIO $ modifyMVar_ var $
855 pure . (r_state . at ngramsType %~ (Just . (at listId %~ (Just . (<> m) . something)) . something))
858 m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
861 tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> Maybe ListType -> [NgramsTerm] -> m ()
862 tableNgramsPost tabType listId mayList =
863 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
865 currentVersion :: RepoCmdM env err m => m Version
868 r <- liftIO $ readMVar var
869 pure $ r ^. r_version
871 tableNgramsPull :: RepoCmdM env err m
872 => ListId -> NgramsType
874 -> m (Versioned NgramsTablePatch)
875 tableNgramsPull listId ngramsType p_version = do
877 r <- liftIO $ readMVar var
880 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
881 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
883 pure (Versioned (r ^. r_version) q_table)
885 -- Apply the given patch to the DB and returns the patch to be applied on the
888 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
890 -> Versioned NgramsTablePatch
891 -> m (Versioned NgramsTablePatch)
892 tableNgramsPut tabType listId (Versioned p_version p_table)
893 | p_table == mempty = do
894 let ngramsType = ngramsTypeFromTabType tabType
895 tableNgramsPull listId ngramsType p_version
898 let ngramsType = ngramsTypeFromTabType tabType
899 (p0, p0_validity) = PM.singleton listId p_table
900 (p, p_validity) = PM.singleton ngramsType p0
902 assertValid p0_validity
903 assertValid p_validity
906 vq' <- liftIO $ modifyMVar var $ \r -> do
908 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
909 (p', q') = transformWith ngramsStatePatchConflictResolution p q
910 r' = r & r_version +~ 1
912 & r_history %~ (p' :)
913 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
915 -- Ideally we would like to check these properties. However:
916 -- * They should be checked only to debug the code. The client data
917 -- should be able to trigger these.
918 -- * What kind of error should they throw (we are in IO here)?
919 -- * Should we keep modifyMVar?
920 -- * Should we throw the validation in an Exception, catch it around
921 -- modifyMVar and throw it back as an Error?
922 assertValid $ transformable p q
923 assertValid $ applicable p' (r ^. r_state)
925 pure (r', Versioned (r' ^. r_version) q'_table)
930 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
931 mergeNgramsElement _neOld neNew = neNew
933 { _ne_list :: ListType
934 If we merge the parents/children we can potentially create cycles!
935 , _ne_parent :: Maybe NgramsTerm
936 , _ne_children :: MSet NgramsTerm
940 getNgramsTableMap :: RepoCmdM env err m
941 => NodeId -> NgramsType -> m (Versioned NgramsTableMap)
942 getNgramsTableMap nodeId ngramsType = do
944 repo <- liftIO $ readMVar v
945 pure $ Versioned (repo ^. r_version)
946 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
951 -- | TODO Errors management
952 -- TODO: polymorphic for Annuaire or Corpus or ...
953 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
954 -- TODO: should take only one ListId
956 getTime' :: MonadIO m => m TimeSpec
957 getTime' = liftIO $ getTime ProcessCPUTime
960 getTableNgrams :: forall env err m.
961 (RepoCmdM env err m, HasNodeError err, HasConnection env)
962 => NodeType -> NodeId -> TabType
963 -> ListId -> Limit -> Maybe Offset
965 -> Maybe MinSize -> Maybe MaxSize
967 -> (NgramsTerm -> Bool)
968 -> m (Versioned NgramsTable)
969 getTableNgrams _nType nId tabType listId limit_ offset
970 listType minSize maxSize orderBy searchQuery = do
973 -- lIds <- selectNodesWithUsername NodeList userMaster
975 ngramsType = ngramsTypeFromTabType tabType
976 offset' = maybe 0 identity offset
977 listType' = maybe (const True) (==) listType
978 minSize' = maybe (const True) (<=) minSize
979 maxSize' = maybe (const True) (>=) maxSize
981 selected_node n = minSize' s
983 && searchQuery (n ^. ne_ngrams)
984 && listType' (n ^. ne_list)
988 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
990 ---------------------------------------
991 sortOnOrder Nothing = identity
992 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
993 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
994 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
995 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
997 ---------------------------------------
998 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
999 selectAndPaginate tableMap = roots <> inners
1001 list = tableMap ^.. each
1002 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1004 selected_nodes = list & take limit_
1006 . filter selected_node
1007 . sortOnOrder orderBy
1008 roots = rootOf <$> selected_nodes
1009 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1010 inners = list & filter (selected_inner rootsSet)
1012 ---------------------------------------
1013 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1014 setScores False table = pure table
1015 setScores True table = do
1016 let ngrams_terms = (table ^.. each . ne_ngrams)
1018 occurrences <- getOccByNgramsOnlyFast nId
1022 liftIO $ hprint stderr
1023 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1024 (length ngrams_terms) t1 t2
1026 occurrences <- getOccByNgramsOnlySlow nType nId
1032 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1034 pure $ table & each %~ setOcc
1035 ---------------------------------------
1037 -- lists <- catMaybes <$> listsWith userMaster
1038 -- trace (show lists) $
1039 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1041 let nSco = needsScores orderBy
1042 tableMap1 <- getNgramsTableMap listId ngramsType
1044 tableMap2 <- tableMap1 & v_data %%~ setScores nSco
1045 . Map.mapWithKey ngramsElementFromRepo
1047 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1048 . setScores (not nSco)
1051 liftIO $ hprint stderr
1052 ("getTableNgrams total=" % timeSpecs
1053 % " map1=" % timeSpecs
1054 % " map2=" % timeSpecs
1055 % " map3=" % timeSpecs
1056 % " sql=" % if nSco then "map2" else "map3"
1058 ) t0 t3 t0 t1 t1 t2 t2 t3
1064 -- TODO: find a better place for the code above, All APIs stay here
1065 type QueryParamR = QueryParam' '[Required, Strict]
1068 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1069 deriving (Generic, Enum, Bounded, Read, Show)
1071 instance FromHttpApiData OrderBy
1073 parseUrlPiece "TermAsc" = pure TermAsc
1074 parseUrlPiece "TermDesc" = pure TermDesc
1075 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1076 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1077 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1079 instance ToParamSchema OrderBy
1080 instance FromJSON OrderBy
1081 instance ToJSON OrderBy
1082 instance ToSchema OrderBy
1083 instance Arbitrary OrderBy
1085 arbitrary = elements [minBound..maxBound]
1087 needsScores :: Maybe OrderBy -> Bool
1088 needsScores (Just ScoreAsc) = True
1089 needsScores (Just ScoreDesc) = True
1090 needsScores _ = False
1092 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1093 :> QueryParamR "ngramsType" TabType
1094 :> QueryParamR "list" ListId
1095 :> QueryParamR "limit" Limit
1096 :> QueryParam "offset" Offset
1097 :> QueryParam "listType" ListType
1098 :> QueryParam "minTermSize" MinSize
1099 :> QueryParam "maxTermSize" MaxSize
1100 :> QueryParam "orderBy" OrderBy
1101 :> QueryParam "search" Text
1102 :> Get '[JSON] (Versioned NgramsTable)
1104 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1105 :> QueryParamR "ngramsType" TabType
1106 :> QueryParamR "list" ListId
1107 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1108 :> Put '[JSON] (Versioned NgramsTablePatch)
1110 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1111 :> QueryParamR "ngramsType" TabType
1112 :> QueryParamR "list" ListId
1113 :> QueryParam "listType" ListType
1114 :> ReqBody '[JSON] [NgramsTerm]
1117 type TableNgramsApi = TableNgramsApiGet
1118 :<|> TableNgramsApiPut
1119 :<|> TableNgramsApiPost
1121 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1122 => NodeId -> TabType
1123 -> ListId -> Limit -> Maybe Offset
1125 -> Maybe MinSize -> Maybe MaxSize
1127 -> Maybe Text -- full text search
1128 -> m (Versioned NgramsTable)
1129 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1130 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1132 searchQuery = maybe (const True) isInfixOf mt
1134 -- | Text search is deactivated for now for ngrams by doc only
1135 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1137 -> ListId -> Limit -> Maybe Offset
1139 -> Maybe MinSize -> Maybe MaxSize
1141 -> Maybe Text -- full text search
1142 -> m (Versioned NgramsTable)
1143 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1144 ns <- selectNodesWithUsername NodeList userMaster
1145 let ngramsType = ngramsTypeFromTabType tabType
1146 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1147 let searchQuery = flip S.member (S.fromList ngs)
1148 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1154 apiNgramsTableCorpus :: ( RepoCmdM env err m
1156 , HasInvalidError err
1159 => NodeId -> ServerT TableNgramsApi m
1160 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1162 :<|> tableNgramsPost
1165 apiNgramsTableDoc :: ( RepoCmdM env err m
1167 , HasInvalidError err
1170 => DocId -> ServerT TableNgramsApi m
1171 apiNgramsTableDoc dId = getTableNgramsDoc dId
1173 :<|> tableNgramsPost
1174 -- > add new ngrams in database (TODO AD)
1175 -- > index all the corpus accordingly (TODO AD)
1177 listNgramsChangedSince :: RepoCmdM env err m => ListId -> NgramsType -> Version -> m (Versioned Bool)
1178 listNgramsChangedSince listId ngramsType version
1180 Versioned <$> currentVersion <*> pure True
1182 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)