1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE DataKinds #-}
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24 {-# LANGUAGE ScopedTypeVariables #-}
25 {-# LANGUAGE TemplateHaskell #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE FlexibleContexts #-}
28 {-# LANGUAGE FlexibleInstances #-}
29 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
30 {-# LANGUAGE MultiParamTypeClasses #-}
31 {-# LANGUAGE RankNTypes #-}
32 {-# LANGUAGE TypeFamilies #-}
33 {-# OPTIONS -fno-warn-orphans #-}
35 module Gargantext.API.Ngrams
45 , apiNgramsTableCorpus
67 , NgramsRepoElement(..)
76 , ngramsTypeFromTabType
93 , listNgramsChangedSince
97 -- import Debug.Trace (trace)
98 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
99 -- import Gargantext.Database.Schema.User (UserId)
100 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
101 Composable(..), Transformable(..),
102 PairPatch(..), Patched, ConflictResolution,
103 ConflictResolutionReplace, ours)
104 import qualified Data.Map.Strict.Patch as PM
106 import Data.Ord (Down(..))
108 --import Data.Semigroup
109 import Data.Set (Set)
110 import qualified Data.Set as S
111 import qualified Data.List as List
112 import Data.Maybe (fromMaybe)
113 -- import Data.Tuple.Extra (first)
114 import qualified Data.Map.Strict as Map
115 import Data.Map.Strict (Map)
116 import qualified Data.Set as Set
117 import Control.Category ((>>>))
118 import Control.Concurrent
119 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
120 import Control.Monad.Error.Class (MonadError)
121 import Control.Monad.Reader
122 import Control.Monad.State
123 import Data.Aeson hiding ((.=))
124 import Data.Aeson.TH (deriveJSON)
125 import Data.Either(Either(Left))
126 -- import Data.Map (lookup)
127 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
128 import Data.Swagger hiding (version, patch)
129 import Data.Text (Text, isInfixOf, count)
131 import Formatting (hprint, int, (%))
132 import Formatting.Clock (timeSpecs)
133 import GHC.Generics (Generic)
134 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
135 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
136 import Gargantext.Database.Config (userMaster)
137 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
138 import Gargantext.Database.Schema.Ngrams (NgramsType)
139 import Gargantext.Database.Types.Node (NodeType(..))
140 import Gargantext.Database.Utils (fromField', HasConnection)
141 import Gargantext.Database.Node.Select
142 import Gargantext.Database.Ngrams
143 --import Gargantext.Database.Lists (listsWith)
144 import Gargantext.Database.Schema.Node (HasNodeError)
145 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
146 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
147 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
148 import Gargantext.Prelude
149 import Gargantext.Core.Types (TODO)
150 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
151 import Servant hiding (Patch)
152 import System.Clock (getTime, TimeSpec, Clock(..))
153 import System.FileLock (FileLock)
154 import System.IO (stderr)
155 import Test.QuickCheck (elements)
156 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
158 ------------------------------------------------------------------------
159 --data FacetFormat = Table | Chart
160 data TabType = Docs | Trash | MoreFav | MoreTrash
161 | Terms | Sources | Authors | Institutes
163 deriving (Generic, Enum, Bounded, Show)
165 instance FromHttpApiData TabType
167 parseUrlPiece "Docs" = pure Docs
168 parseUrlPiece "Trash" = pure Trash
169 parseUrlPiece "MoreFav" = pure MoreFav
170 parseUrlPiece "MoreTrash" = pure MoreTrash
172 parseUrlPiece "Terms" = pure Terms
173 parseUrlPiece "Sources" = pure Sources
174 parseUrlPiece "Institutes" = pure Institutes
175 parseUrlPiece "Authors" = pure Authors
177 parseUrlPiece "Contacts" = pure Contacts
179 parseUrlPiece _ = Left "Unexpected value of TabType"
181 instance ToParamSchema TabType
182 instance ToJSON TabType
183 instance FromJSON TabType
184 instance ToSchema TabType
185 instance Arbitrary TabType
187 arbitrary = elements [minBound .. maxBound]
189 newtype MSet a = MSet (Map a ())
190 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
192 instance ToJSON a => ToJSON (MSet a) where
193 toJSON (MSet m) = toJSON (Map.keys m)
194 toEncoding (MSet m) = toEncoding (Map.keys m)
196 mSetFromSet :: Set a -> MSet a
197 mSetFromSet = MSet . Map.fromSet (const ())
199 mSetFromList :: Ord a => [a] -> MSet a
200 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
202 -- mSetToSet :: Ord a => MSet a -> Set a
203 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
204 mSetToSet :: Ord a => MSet a -> Set a
205 mSetToSet = Set.fromList . mSetToList
207 mSetToList :: MSet a -> [a]
208 mSetToList (MSet a) = Map.keys a
210 instance Foldable MSet where
211 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
213 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
214 parseJSON = fmap mSetFromList . parseJSON
216 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
218 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
220 ------------------------------------------------------------------------
221 type NgramsTerm = Text
223 data RootParent = RootParent
224 { _rp_root :: NgramsTerm
225 , _rp_parent :: NgramsTerm
227 deriving (Ord, Eq, Show, Generic)
229 deriveJSON (unPrefix "_rp_") ''RootParent
230 makeLenses ''RootParent
232 data NgramsRepoElement = NgramsRepoElement
234 , _nre_list :: ListType
235 --, _nre_root_parent :: Maybe RootParent
236 , _nre_root :: Maybe NgramsTerm
237 , _nre_parent :: Maybe NgramsTerm
238 , _nre_children :: MSet NgramsTerm
240 deriving (Ord, Eq, Show, Generic)
242 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
243 makeLenses ''NgramsRepoElement
245 instance ToSchema NgramsRepoElement where
246 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
250 NgramsElement { _ne_ngrams :: NgramsTerm
252 , _ne_list :: ListType
253 , _ne_occurrences :: Int
254 , _ne_root :: Maybe NgramsTerm
255 , _ne_parent :: Maybe NgramsTerm
256 , _ne_children :: MSet NgramsTerm
258 deriving (Ord, Eq, Show, Generic)
260 deriveJSON (unPrefix "_ne_") ''NgramsElement
261 makeLenses ''NgramsElement
263 mkNgramsElement :: NgramsTerm
268 mkNgramsElement ngrams list rp children =
269 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
272 size = 1 + count " " ngrams
274 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
275 newNgramsElement mayList ngrams =
276 mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
278 instance ToSchema NgramsElement where
279 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
280 instance Arbitrary NgramsElement where
281 arbitrary = elements [newNgramsElement Nothing "sport"]
283 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
285 (NgramsElement { _ne_size = s
299 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
300 ngramsElementFromRepo
309 NgramsElement { _ne_size = s
314 , _ne_ngrams = ngrams
315 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
317 -- Here we could use 0 if we want to avoid any `panic`.
318 -- It will not happen using getTableNgrams if
319 -- getOccByNgramsOnly provides a count of occurrences for
320 -- all the ngrams given.
324 ------------------------------------------------------------------------
325 newtype NgramsTable = NgramsTable [NgramsElement]
326 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
328 type NgramsList = NgramsTable
330 makePrisms ''NgramsTable
332 -- | Question: why these repetition of Type in this instance
333 -- may you document it please ?
334 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
335 each = _NgramsTable . each
338 -- | TODO Check N and Weight
340 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
341 toNgramsElement ns = map toNgramsElement' ns
343 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
347 Just x -> lookup x mapParent
348 c' = maybe mempty identity $ lookup t mapChildren
349 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
351 mapParent :: Map Int Text
352 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
354 mapChildren :: Map Text (Set Text)
355 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
356 $ Map.fromListWith (<>)
357 $ map (first fromJust)
358 $ filter (isJust . fst)
359 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
362 mockTable :: NgramsTable
363 mockTable = NgramsTable
364 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
365 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
366 , mkNgramsElement "cats" StopTerm Nothing mempty
367 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
368 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
369 , mkNgramsElement "fox" GraphTerm Nothing mempty
370 , mkNgramsElement "object" CandidateTerm Nothing mempty
371 , mkNgramsElement "nothing" StopTerm Nothing mempty
372 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
373 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
374 , mkNgramsElement "moon" CandidateTerm Nothing mempty
375 , mkNgramsElement "sky" StopTerm Nothing mempty
378 rp n = Just $ RootParent n n
380 instance Arbitrary NgramsTable where
381 arbitrary = pure mockTable
383 instance ToSchema NgramsTable
385 ------------------------------------------------------------------------
386 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
387 ------------------------------------------------------------------------
388 -- On the Client side:
389 --data Action = InGroup NgramsId NgramsId
390 -- | OutGroup NgramsId NgramsId
391 -- | SetListType NgramsId ListType
393 data PatchSet a = PatchSet
397 deriving (Eq, Ord, Show, Generic)
399 makeLenses ''PatchSet
400 makePrisms ''PatchSet
402 instance ToJSON a => ToJSON (PatchSet a) where
403 toJSON = genericToJSON $ unPrefix "_"
404 toEncoding = genericToEncoding $ unPrefix "_"
406 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
407 parseJSON = genericParseJSON $ unPrefix "_"
410 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
411 arbitrary = PatchSet <$> arbitrary <*> arbitrary
413 type instance Patched (PatchSet a) = Set a
415 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
416 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
418 instance Ord a => Semigroup (PatchSet a) where
419 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
420 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
423 instance Ord a => Monoid (PatchSet a) where
424 mempty = PatchSet mempty mempty
426 instance Ord a => Group (PatchSet a) where
427 invert (PatchSet r a) = PatchSet a r
429 instance Ord a => Composable (PatchSet a) where
430 composable _ _ = undefined
432 instance Ord a => Action (PatchSet a) (Set a) where
433 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
435 instance Applicable (PatchSet a) (Set a) where
436 applicable _ _ = mempty
438 instance Ord a => Validity (PatchSet a) where
439 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
441 instance Ord a => Transformable (PatchSet a) where
442 transformable = undefined
444 conflicts _p _q = undefined
446 transformWith conflict p q = undefined conflict p q
448 instance ToSchema a => ToSchema (PatchSet a)
451 type AddRem = Replace (Maybe ())
453 remPatch, addPatch :: AddRem
454 remPatch = replace (Just ()) Nothing
455 addPatch = replace Nothing (Just ())
457 isRem :: Replace (Maybe ()) -> Bool
458 isRem = (== remPatch)
460 type PatchMap = PM.PatchMap
462 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
463 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
464 Transformable, Composable)
466 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
467 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
469 -- TODO this breaks module abstraction
470 makePrisms ''PM.PatchMap
472 makePrisms ''PatchMSet
474 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
475 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
477 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
478 f = Map.partition isRem >>> both %~ Map.keysSet
480 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
481 g (rems, adds) = Map.fromSet (const remPatch) rems
482 <> Map.fromSet (const addPatch) adds
484 instance Ord a => Action (PatchMSet a) (MSet a) where
485 act (PatchMSet p) (MSet m) = MSet $ act p m
487 instance Ord a => Applicable (PatchMSet a) (MSet a) where
488 applicable (PatchMSet p) (MSet m) = applicable p m
490 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
491 toJSON = toJSON . view _PatchMSetIso
492 toEncoding = toEncoding . view _PatchMSetIso
494 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
495 parseJSON = fmap (_PatchMSetIso #) . parseJSON
497 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
498 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
500 instance ToSchema a => ToSchema (PatchMSet a) where
502 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
504 type instance Patched (PatchMSet a) = MSet a
506 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
507 arbitrary = uncurry replace <$> arbitrary
508 -- If they happen to be equal then the patch is Keep.
510 instance ToSchema a => ToSchema (Replace a) where
511 declareNamedSchema (_ :: Proxy (Replace a)) = do
512 -- TODO Keep constructor is not supported here.
513 aSchema <- declareSchemaRef (Proxy :: Proxy a)
514 return $ NamedSchema (Just "Replace") $ mempty
515 & type_ ?~ SwaggerObject
517 InsOrdHashMap.fromList
521 & required .~ [ "old", "new" ]
524 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
525 , _patch_list :: Replace ListType -- TODO Map UserId ListType
527 deriving (Eq, Show, Generic)
529 deriveJSON (unPrefix "_") ''NgramsPatch
530 makeLenses ''NgramsPatch
532 instance ToSchema NgramsPatch where
533 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
535 instance Arbitrary NgramsPatch where
536 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
538 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
540 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
541 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
543 instance Semigroup NgramsPatch where
544 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
546 instance Monoid NgramsPatch where
547 mempty = _NgramsPatch # mempty
549 instance Validity NgramsPatch where
550 validate p = p ^. _NgramsPatch . to validate
552 instance Transformable NgramsPatch where
553 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
555 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
557 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
559 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
561 type ConflictResolutionNgramsPatch =
562 ( ConflictResolutionPatchMSet NgramsTerm
563 , ConflictResolutionReplace ListType
565 type instance ConflictResolution NgramsPatch =
566 ConflictResolutionNgramsPatch
568 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
569 -- ~ Patched NgramsPatchIso
570 type instance Patched NgramsPatch = PatchedNgramsPatch
572 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
573 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
574 applicable p (Just nre) =
575 applicable (p ^. patch_children) (nre ^. nre_children) <>
576 applicable (p ^. patch_list) (nre ^. nre_list)
578 instance Action NgramsPatch NgramsRepoElement where
579 act p = (nre_children %~ act (p ^. patch_children))
580 . (nre_list %~ act (p ^. patch_list))
582 instance Action NgramsPatch (Maybe NgramsRepoElement) where
585 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
586 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
588 instance FromField NgramsTablePatch
590 fromField = fromField'
592 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
594 fromField = fromField'
596 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
598 type instance ConflictResolution NgramsTablePatch =
599 NgramsTerm -> ConflictResolutionNgramsPatch
601 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
602 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
603 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
605 makePrisms ''NgramsTablePatch
606 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
607 instance ToSchema NgramsTablePatch
609 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
610 applicable p = applicable (p ^. _NgramsTablePatch)
612 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
614 fmap (execState (reParentNgramsTablePatch p)) .
615 act (p ^. _NgramsTablePatch)
617 instance Arbitrary NgramsTablePatch where
618 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
620 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
621 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
622 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
624 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
626 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
627 reRootChildren root ngram = do
628 nre <- use $ at ngram
629 forOf_ (_Just . nre_children . folded) nre $ \child -> do
630 at child . _Just . nre_root ?= root
631 reRootChildren root child
633 reParent :: Maybe RootParent -> ReParent NgramsTerm
634 reParent rp child = do
635 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
636 . (nre_root .~ (_rp_root <$> rp))
638 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
640 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
641 reParentAddRem rp child p =
642 reParent (if isRem p then Nothing else Just rp) child
644 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
645 reParentNgramsPatch parent ngramsPatch = do
646 root_of_parent <- use (at parent . _Just . nre_root)
648 root = fromMaybe parent root_of_parent
649 rp = RootParent { _rp_root = root, _rp_parent = parent }
650 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
651 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
653 reParentNgramsTablePatch :: ReParent NgramsTablePatch
654 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
655 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
657 ------------------------------------------------------------------------
658 ------------------------------------------------------------------------
661 data Versioned a = Versioned
662 { _v_version :: Version
665 deriving (Generic, Show)
666 deriveJSON (unPrefix "_v_") ''Versioned
667 makeLenses ''Versioned
668 instance ToSchema a => ToSchema (Versioned a) where
669 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
670 instance Arbitrary a => Arbitrary (Versioned a) where
671 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
674 -- TODO sequencs of modifications (Patchs)
675 type NgramsIdPatch = Patch NgramsId NgramsPatch
677 ngramsPatch :: Int -> NgramsPatch
678 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
680 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
681 toEdit n p = Edit n p
682 ngramsIdPatch :: Patch NgramsId NgramsPatch
683 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
684 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
685 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
688 -- applyPatchBack :: Patch -> IO Patch
689 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
691 ------------------------------------------------------------------------
692 ------------------------------------------------------------------------
693 ------------------------------------------------------------------------
696 -- TODO: Replace.old is ignored which means that if the current list
697 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
698 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
699 -- However this should not happen in non conflicting situations.
700 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
701 mkListsUpdate nt patches =
702 [ (ngramsTypeId nt, ng, listTypeId lt)
703 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
704 , lt <- patch ^.. patch_list . new
707 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
710 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
711 mkChildrenGroups addOrRem nt patches =
712 [ (ngramsTypeId nt, parent, child)
713 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
714 , child <- patch ^.. patch_children . to addOrRem . folded
718 ngramsTypeFromTabType :: TabType -> NgramsType
719 ngramsTypeFromTabType tabType =
720 let lieu = "Garg.API.Ngrams: " :: Text in
722 Sources -> Ngrams.Sources
723 Authors -> Ngrams.Authors
724 Institutes -> Ngrams.Institutes
725 Terms -> Ngrams.NgramsTerms
726 _ -> panic $ lieu <> "No Ngrams for this tab"
727 -- TODO: This `panic` would disapear with custom NgramsType.
729 ------------------------------------------------------------------------
731 { _r_version :: Version
734 -- first patch in the list is the most recent
738 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
739 parseJSON = genericParseJSON $ unPrefix "_r_"
741 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
742 toJSON = genericToJSON $ unPrefix "_r_"
743 toEncoding = genericToEncoding $ unPrefix "_r_"
747 initRepo :: Monoid s => Repo s p
748 initRepo = Repo 1 mempty []
750 type NgramsRepo = Repo NgramsState NgramsStatePatch
751 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
752 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
754 initMockRepo :: NgramsRepo
755 initMockRepo = Repo 1 s []
757 s = Map.singleton Ngrams.NgramsTerms
758 $ Map.singleton 47254
760 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
762 data RepoEnv = RepoEnv
763 { _renv_var :: !(MVar NgramsRepo)
764 , _renv_saver :: !(IO ())
765 , _renv_lock :: !FileLock
771 class HasRepoVar env where
772 repoVar :: Getter env (MVar NgramsRepo)
774 instance HasRepoVar (MVar NgramsRepo) where
777 class HasRepoSaver env where
778 repoSaver :: Getter env (IO ())
780 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
781 repoEnv :: Getter env RepoEnv
783 instance HasRepo RepoEnv where
786 instance HasRepoVar RepoEnv where
789 instance HasRepoSaver RepoEnv where
790 repoSaver = renv_saver
792 type RepoCmdM env err m =
798 ------------------------------------------------------------------------
800 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
802 saveRepo = liftIO =<< view repoSaver
804 listTypeConflictResolution :: ListType -> ListType -> ListType
805 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
807 ngramsStatePatchConflictResolution
808 :: NgramsType -> NodeId -> NgramsTerm
809 -> ConflictResolutionNgramsPatch
810 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
812 -- undefined {- TODO think this through -}, listTypeConflictResolution)
815 -- Insertions are not considered as patches,
816 -- they do not extend history,
817 -- they do not bump version.
818 insertNewOnly :: a -> Maybe b -> a
819 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
820 -- TODO error handling
822 something :: Monoid a => Maybe a -> a
823 something Nothing = mempty
824 something (Just a) = a
827 -- TODO refactor with putListNgrams
828 copyListNgrams :: RepoCmdM env err m
829 => NodeId -> NodeId -> NgramsType
831 copyListNgrams srcListId dstListId ngramsType = do
833 liftIO $ modifyMVar_ var $
834 pure . (r_state . at ngramsType %~ (Just . f . something))
837 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
838 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
840 -- TODO refactor with putListNgrams
841 -- The list must be non-empty!
842 -- The added ngrams must be non-existent!
843 addListNgrams :: RepoCmdM env err m
844 => NodeId -> NgramsType
845 -> [NgramsElement] -> m ()
846 addListNgrams listId ngramsType nes = do
848 liftIO $ modifyMVar_ var $
849 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
852 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
855 -- If the given list of ngrams elements contains ngrams already in
856 -- the repo, they will be ignored.
857 putListNgrams :: RepoCmdM env err m
858 => NodeId -> NgramsType
859 -> [NgramsElement] -> m ()
860 putListNgrams _ _ [] = pure ()
861 putListNgrams listId ngramsType nes = putListNgrams' listId ngramsType m
863 m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
865 putListNgrams' :: RepoCmdM env err m
866 => ListId -> NgramsType
867 -> Map NgramsTerm NgramsRepoElement
869 putListNgrams' listId ngramsType ns = do
870 -- printDebug "putListNgrams" (length nes)
872 liftIO $ modifyMVar_ var $
889 tableNgramsPost :: RepoCmdM env err m
893 -> [NgramsTerm] -> m ()
894 tableNgramsPost tabType listId mayList =
895 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
897 currentVersion :: RepoCmdM env err m
901 r <- liftIO $ readMVar var
902 pure $ r ^. r_version
904 tableNgramsPull :: RepoCmdM env err m
905 => ListId -> NgramsType
907 -> m (Versioned NgramsTablePatch)
908 tableNgramsPull listId ngramsType p_version = do
910 r <- liftIO $ readMVar var
913 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
914 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
916 pure (Versioned (r ^. r_version) q_table)
918 -- Apply the given patch to the DB and returns the patch to be applied on the
921 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
923 -> Versioned NgramsTablePatch
924 -> m (Versioned NgramsTablePatch)
925 tableNgramsPut tabType listId (Versioned p_version p_table)
926 | p_table == mempty = do
927 let ngramsType = ngramsTypeFromTabType tabType
928 tableNgramsPull listId ngramsType p_version
931 let ngramsType = ngramsTypeFromTabType tabType
932 (p0, p0_validity) = PM.singleton listId p_table
933 (p, p_validity) = PM.singleton ngramsType p0
935 assertValid p0_validity
936 assertValid p_validity
939 vq' <- liftIO $ modifyMVar var $ \r -> do
941 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
942 (p', q') = transformWith ngramsStatePatchConflictResolution p q
943 r' = r & r_version +~ 1
945 & r_history %~ (p' :)
946 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
948 -- Ideally we would like to check these properties. However:
949 -- * They should be checked only to debug the code. The client data
950 -- should be able to trigger these.
951 -- * What kind of error should they throw (we are in IO here)?
952 -- * Should we keep modifyMVar?
953 -- * Should we throw the validation in an Exception, catch it around
954 -- modifyMVar and throw it back as an Error?
955 assertValid $ transformable p q
956 assertValid $ applicable p' (r ^. r_state)
958 pure (r', Versioned (r' ^. r_version) q'_table)
963 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
964 mergeNgramsElement _neOld neNew = neNew
966 { _ne_list :: ListType
967 If we merge the parents/children we can potentially create cycles!
968 , _ne_parent :: Maybe NgramsTerm
969 , _ne_children :: MSet NgramsTerm
973 getNgramsTableMap :: RepoCmdM env err m
976 -> m (Versioned NgramsTableMap)
977 getNgramsTableMap nodeId ngramsType = do
979 repo <- liftIO $ readMVar v
980 pure $ Versioned (repo ^. r_version)
981 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
986 -- | TODO Errors management
987 -- TODO: polymorphic for Annuaire or Corpus or ...
988 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
989 -- TODO: should take only one ListId
991 getTime' :: MonadIO m => m TimeSpec
992 getTime' = liftIO $ getTime ProcessCPUTime
995 getTableNgrams :: forall env err m.
996 (RepoCmdM env err m, HasNodeError err, HasConnection env)
997 => NodeType -> NodeId -> TabType
998 -> ListId -> Limit -> Maybe Offset
1000 -> Maybe MinSize -> Maybe MaxSize
1002 -> (NgramsTerm -> Bool)
1003 -> m (Versioned NgramsTable)
1004 getTableNgrams _nType nId tabType listId limit_ offset
1005 listType minSize maxSize orderBy searchQuery = do
1008 -- lIds <- selectNodesWithUsername NodeList userMaster
1010 ngramsType = ngramsTypeFromTabType tabType
1011 offset' = maybe 0 identity offset
1012 listType' = maybe (const True) (==) listType
1013 minSize' = maybe (const True) (<=) minSize
1014 maxSize' = maybe (const True) (>=) maxSize
1016 selected_node n = minSize' s
1018 && searchQuery (n ^. ne_ngrams)
1019 && listType' (n ^. ne_list)
1023 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
1025 ---------------------------------------
1026 sortOnOrder Nothing = identity
1027 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
1028 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
1029 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
1030 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
1032 ---------------------------------------
1033 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
1034 selectAndPaginate tableMap = roots <> inners
1036 list = tableMap ^.. each
1037 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1039 selected_nodes = list & take limit_
1041 . filter selected_node
1042 . sortOnOrder orderBy
1043 roots = rootOf <$> selected_nodes
1044 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1045 inners = list & filter (selected_inner rootsSet)
1047 ---------------------------------------
1048 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1049 setScores False table = pure table
1050 setScores True table = do
1051 let ngrams_terms = (table ^.. each . ne_ngrams)
1053 occurrences <- getOccByNgramsOnlyFast' nId
1058 liftIO $ hprint stderr
1059 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1060 (length ngrams_terms) t1 t2
1062 occurrences <- getOccByNgramsOnlySlow nType nId
1068 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1070 pure $ table & each %~ setOcc
1071 ---------------------------------------
1073 -- lists <- catMaybes <$> listsWith userMaster
1074 -- trace (show lists) $
1075 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1077 let scoresNeeded = needsScores orderBy
1078 tableMap1 <- getNgramsTableMap listId ngramsType
1080 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1081 . Map.mapWithKey ngramsElementFromRepo
1083 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1084 . setScores (not scoresNeeded)
1087 liftIO $ hprint stderr
1088 ("getTableNgrams total=" % timeSpecs
1089 % " map1=" % timeSpecs
1090 % " map2=" % timeSpecs
1091 % " map3=" % timeSpecs
1092 % " sql=" % (if scoresNeeded then "map2" else "map3")
1094 ) t0 t3 t0 t1 t1 t2 t2 t3
1100 -- TODO: find a better place for the code above, All APIs stay here
1101 type QueryParamR = QueryParam' '[Required, Strict]
1104 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1105 deriving (Generic, Enum, Bounded, Read, Show)
1107 instance FromHttpApiData OrderBy
1109 parseUrlPiece "TermAsc" = pure TermAsc
1110 parseUrlPiece "TermDesc" = pure TermDesc
1111 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1112 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1113 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1115 instance ToParamSchema OrderBy
1116 instance FromJSON OrderBy
1117 instance ToJSON OrderBy
1118 instance ToSchema OrderBy
1119 instance Arbitrary OrderBy
1121 arbitrary = elements [minBound..maxBound]
1123 needsScores :: Maybe OrderBy -> Bool
1124 needsScores (Just ScoreAsc) = True
1125 needsScores (Just ScoreDesc) = True
1126 needsScores _ = False
1128 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1129 :> QueryParamR "ngramsType" TabType
1130 :> QueryParamR "list" ListId
1131 :> QueryParamR "limit" Limit
1132 :> QueryParam "offset" Offset
1133 :> QueryParam "listType" ListType
1134 :> QueryParam "minTermSize" MinSize
1135 :> QueryParam "maxTermSize" MaxSize
1136 :> QueryParam "orderBy" OrderBy
1137 :> QueryParam "search" Text
1138 :> Get '[JSON] (Versioned NgramsTable)
1140 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1141 :> QueryParamR "ngramsType" TabType
1142 :> QueryParamR "list" ListId
1143 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1144 :> Put '[JSON] (Versioned NgramsTablePatch)
1146 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1147 :> QueryParamR "ngramsType" TabType
1148 :> QueryParamR "list" ListId
1149 :> QueryParam "listType" ListType
1150 :> ReqBody '[JSON] [NgramsTerm]
1153 type TableNgramsApi = TableNgramsApiGet
1154 :<|> TableNgramsApiPut
1155 :<|> TableNgramsApiPost
1157 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1158 => NodeId -> TabType
1159 -> ListId -> Limit -> Maybe Offset
1161 -> Maybe MinSize -> Maybe MaxSize
1163 -> Maybe Text -- full text search
1164 -> m (Versioned NgramsTable)
1165 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1166 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1168 searchQuery = maybe (const True) isInfixOf mt
1170 -- | Text search is deactivated for now for ngrams by doc only
1171 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1173 -> ListId -> Limit -> Maybe Offset
1175 -> Maybe MinSize -> Maybe MaxSize
1177 -> Maybe Text -- full text search
1178 -> m (Versioned NgramsTable)
1179 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1180 ns <- selectNodesWithUsername NodeList userMaster
1181 let ngramsType = ngramsTypeFromTabType tabType
1182 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1183 let searchQuery = flip S.member (S.fromList ngs)
1184 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1188 apiNgramsTableCorpus :: ( RepoCmdM env err m
1190 , HasInvalidError err
1193 => NodeId -> ServerT TableNgramsApi m
1194 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1196 :<|> tableNgramsPost
1199 apiNgramsTableDoc :: ( RepoCmdM env err m
1201 , HasInvalidError err
1204 => DocId -> ServerT TableNgramsApi m
1205 apiNgramsTableDoc dId = getTableNgramsDoc dId
1207 :<|> tableNgramsPost
1208 -- > add new ngrams in database (TODO AD)
1209 -- > index all the corpus accordingly (TODO AD)
1211 listNgramsChangedSince :: RepoCmdM env err m
1212 => ListId -> NgramsType -> Version -> m (Versioned Bool)
1213 listNgramsChangedSince listId ngramsType version
1215 Versioned <$> currentVersion <*> pure True
1217 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
1220 instance Arbitrary NgramsRepoElement where
1221 arbitrary = elements $ map ngramsElementToRepo ns
1223 NgramsTable ns = mockTable