1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE DataKinds #-}
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24 {-# LANGUAGE ScopedTypeVariables #-}
25 {-# LANGUAGE TemplateHaskell #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE FlexibleContexts #-}
28 {-# LANGUAGE FlexibleInstances #-}
29 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
30 {-# LANGUAGE MultiParamTypeClasses #-}
31 {-# LANGUAGE RankNTypes #-}
32 {-# LANGUAGE TypeFamilies #-}
33 {-# OPTIONS -fno-warn-orphans #-}
35 module Gargantext.API.Ngrams
44 , apiNgramsTableCorpus
64 , NgramsRepoElement(..)
73 , ngramsTypeFromTabType
84 -- import Debug.Trace (trace)
85 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
86 -- import Gargantext.Database.Schema.User (UserId)
87 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
88 Composable(..), Transformable(..),
89 PairPatch(..), Patched, ConflictResolution,
90 ConflictResolutionReplace, ours)
91 import qualified Data.Map.Strict.Patch as PM
93 import Data.Ord (Down(..))
95 --import Data.Semigroup
97 import qualified Data.Set as S
98 import qualified Data.List as List
99 import Data.Maybe (fromMaybe)
100 -- import Data.Tuple.Extra (first)
101 import qualified Data.Map.Strict as Map
102 import Data.Map.Strict (Map)
103 import qualified Data.Set as Set
104 import Control.Category ((>>>))
105 import Control.Concurrent
106 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~))
107 import Control.Monad.Error.Class (MonadError)
108 import Control.Monad.Reader
109 import Control.Monad.State
110 import Data.Aeson hiding ((.=))
111 import Data.Aeson.TH (deriveJSON)
112 import Data.Either(Either(Left))
113 -- import Data.Map (lookup)
114 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
115 import Data.Swagger hiding (version, patch)
116 import Data.Text (Text, isInfixOf, count)
118 import Formatting (hprint, int, (%))
119 import Formatting.Clock (timeSpecs)
120 import GHC.Generics (Generic)
121 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
122 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
123 import Gargantext.Database.Config (userMaster)
124 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast)
125 import Gargantext.Database.Schema.Ngrams (NgramsType)
126 import Gargantext.Database.Types.Node (NodeType(..))
127 import Gargantext.Database.Utils (fromField', HasConnection)
128 import Gargantext.Database.Node.Select
129 import Gargantext.Database.Ngrams
130 --import Gargantext.Database.Lists (listsWith)
131 import Gargantext.Database.Schema.Node (HasNodeError)
132 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
133 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
134 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
135 import Gargantext.Prelude
136 -- import Gargantext.Core.Types (ListTypeId, listTypeId)
137 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
138 import Servant hiding (Patch)
139 import System.Clock (getTime, TimeSpec, Clock(..))
140 import System.FileLock (FileLock)
141 import System.IO (stderr)
142 import Test.QuickCheck (elements)
143 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
148 instance ToSchema TODO where
149 instance ToParamSchema TODO where
151 ------------------------------------------------------------------------
152 --data FacetFormat = Table | Chart
153 data TabType = Docs | Trash | MoreFav | MoreTrash
154 | Terms | Sources | Authors | Institutes
156 deriving (Generic, Enum, Bounded, Show)
158 instance FromHttpApiData TabType
160 parseUrlPiece "Docs" = pure Docs
161 parseUrlPiece "Trash" = pure Trash
162 parseUrlPiece "MoreFav" = pure MoreFav
163 parseUrlPiece "MoreTrash" = pure MoreTrash
165 parseUrlPiece "Terms" = pure Terms
166 parseUrlPiece "Sources" = pure Sources
167 parseUrlPiece "Institutes" = pure Institutes
168 parseUrlPiece "Authors" = pure Authors
170 parseUrlPiece "Contacts" = pure Contacts
172 parseUrlPiece _ = Left "Unexpected value of TabType"
174 instance ToParamSchema TabType
175 instance ToJSON TabType
176 instance FromJSON TabType
177 instance ToSchema TabType
178 instance Arbitrary TabType
180 arbitrary = elements [minBound .. maxBound]
182 newtype MSet a = MSet (Map a ())
183 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
185 instance ToJSON a => ToJSON (MSet a) where
186 toJSON (MSet m) = toJSON (Map.keys m)
187 toEncoding (MSet m) = toEncoding (Map.keys m)
189 mSetFromSet :: Set a -> MSet a
190 mSetFromSet = MSet . Map.fromSet (const ())
192 mSetFromList :: Ord a => [a] -> MSet a
193 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
195 -- mSetToSet :: Ord a => MSet a -> Set a
196 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
197 mSetToSet :: Ord a => MSet a -> Set a
198 mSetToSet = Set.fromList . mSetToList
200 mSetToList :: MSet a -> [a]
201 mSetToList (MSet a) = Map.keys a
203 instance Foldable MSet where
204 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
206 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
207 parseJSON = fmap mSetFromList . parseJSON
209 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
211 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
213 ------------------------------------------------------------------------
214 type NgramsTerm = Text
216 data RootParent = RootParent
217 { _rp_root :: NgramsTerm
218 , _rp_parent :: NgramsTerm
220 deriving (Ord, Eq, Show, Generic)
222 deriveJSON (unPrefix "_rp_") ''RootParent
223 makeLenses ''RootParent
225 data NgramsRepoElement = NgramsRepoElement
227 , _nre_list :: ListType
228 --, _nre_root_parent :: Maybe RootParent
229 , _nre_root :: Maybe NgramsTerm
230 , _nre_parent :: Maybe NgramsTerm
231 , _nre_children :: MSet NgramsTerm
233 deriving (Ord, Eq, Show, Generic)
235 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
236 makeLenses ''NgramsRepoElement
239 NgramsElement { _ne_ngrams :: NgramsTerm
241 , _ne_list :: ListType
242 , _ne_occurrences :: Int
243 , _ne_root :: Maybe NgramsTerm
244 , _ne_parent :: Maybe NgramsTerm
245 , _ne_children :: MSet NgramsTerm
247 deriving (Ord, Eq, Show, Generic)
249 deriveJSON (unPrefix "_ne_") ''NgramsElement
250 makeLenses ''NgramsElement
252 mkNgramsElement :: NgramsTerm -> ListType -> Maybe RootParent -> MSet NgramsTerm -> NgramsElement
253 mkNgramsElement ngrams list rp children =
254 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
257 size = 1 + count " " ngrams
259 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
260 newNgramsElement mayList ngrams = mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
262 instance ToSchema NgramsElement where
263 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
264 instance Arbitrary NgramsElement where
265 arbitrary = elements [newNgramsElement Nothing "sport"]
267 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
269 (NgramsElement { _ne_size = s
283 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
284 ngramsElementFromRepo
293 NgramsElement { _ne_size = s
298 , _ne_ngrams = ngrams
299 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
301 -- Here we could use 0 if we want to avoid any `panic`.
302 -- It will not happen using getTableNgrams if
303 -- getOccByNgramsOnly provides a count of occurrences for
304 -- all the ngrams given.
308 ------------------------------------------------------------------------
309 newtype NgramsTable = NgramsTable [NgramsElement]
310 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
312 type ListNgrams = NgramsTable
314 makePrisms ''NgramsTable
316 -- | Question: why these repetition of Type in this instance
317 -- may you document it please ?
318 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
319 each = _NgramsTable . each
322 -- | TODO Check N and Weight
324 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
325 toNgramsElement ns = map toNgramsElement' ns
327 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
331 Just x -> lookup x mapParent
332 c' = maybe mempty identity $ lookup t mapChildren
333 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
335 mapParent :: Map Int Text
336 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
338 mapChildren :: Map Text (Set Text)
339 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
340 $ Map.fromListWith (<>)
341 $ map (first fromJust)
342 $ filter (isJust . fst)
343 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
346 mockTable :: NgramsTable
347 mockTable = NgramsTable
348 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
349 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
350 , mkNgramsElement "cats" StopTerm Nothing mempty
351 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
352 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
353 , mkNgramsElement "fox" GraphTerm Nothing mempty
354 , mkNgramsElement "object" CandidateTerm Nothing mempty
355 , mkNgramsElement "nothing" StopTerm Nothing mempty
356 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
357 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
358 , mkNgramsElement "moon" CandidateTerm Nothing mempty
359 , mkNgramsElement "sky" StopTerm Nothing mempty
362 rp n = Just $ RootParent n n
364 instance Arbitrary NgramsTable where
365 arbitrary = pure mockTable
367 instance ToSchema NgramsTable
369 ------------------------------------------------------------------------
370 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
372 ------------------------------------------------------------------------
373 -- On the Client side:
374 --data Action = InGroup NgramsId NgramsId
375 -- | OutGroup NgramsId NgramsId
376 -- | SetListType NgramsId ListType
378 data PatchSet a = PatchSet
382 deriving (Eq, Ord, Show, Generic)
384 makeLenses ''PatchSet
385 makePrisms ''PatchSet
387 instance ToJSON a => ToJSON (PatchSet a) where
388 toJSON = genericToJSON $ unPrefix "_"
389 toEncoding = genericToEncoding $ unPrefix "_"
391 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
392 parseJSON = genericParseJSON $ unPrefix "_"
395 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
396 arbitrary = PatchSet <$> arbitrary <*> arbitrary
398 type instance Patched (PatchSet a) = Set a
400 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
401 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
403 instance Ord a => Semigroup (PatchSet a) where
404 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
405 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
408 instance Ord a => Monoid (PatchSet a) where
409 mempty = PatchSet mempty mempty
411 instance Ord a => Group (PatchSet a) where
412 invert (PatchSet r a) = PatchSet a r
414 instance Ord a => Composable (PatchSet a) where
415 composable _ _ = undefined
417 instance Ord a => Action (PatchSet a) (Set a) where
418 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
420 instance Applicable (PatchSet a) (Set a) where
421 applicable _ _ = mempty
423 instance Ord a => Validity (PatchSet a) where
424 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
426 instance Ord a => Transformable (PatchSet a) where
427 transformable = undefined
429 conflicts _p _q = undefined
431 transformWith conflict p q = undefined conflict p q
433 instance ToSchema a => ToSchema (PatchSet a)
436 type AddRem = Replace (Maybe ())
438 remPatch, addPatch :: AddRem
439 remPatch = replace (Just ()) Nothing
440 addPatch = replace Nothing (Just ())
442 isRem :: Replace (Maybe ()) -> Bool
443 isRem = (== remPatch)
445 type PatchMap = PM.PatchMap
447 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
448 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
449 Transformable, Composable)
451 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
452 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
454 -- TODO this breaks module abstraction
455 makePrisms ''PM.PatchMap
457 makePrisms ''PatchMSet
459 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
460 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
462 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
463 f = Map.partition isRem >>> both %~ Map.keysSet
465 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
466 g (rems, adds) = Map.fromSet (const remPatch) rems
467 <> Map.fromSet (const addPatch) adds
469 instance Ord a => Action (PatchMSet a) (MSet a) where
470 act (PatchMSet p) (MSet m) = MSet $ act p m
472 instance Ord a => Applicable (PatchMSet a) (MSet a) where
473 applicable (PatchMSet p) (MSet m) = applicable p m
475 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
476 toJSON = toJSON . view _PatchMSetIso
477 toEncoding = toEncoding . view _PatchMSetIso
479 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
480 parseJSON = fmap (_PatchMSetIso #) . parseJSON
482 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
483 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
485 instance ToSchema a => ToSchema (PatchMSet a) where
487 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
489 type instance Patched (PatchMSet a) = MSet a
491 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
492 arbitrary = uncurry replace <$> arbitrary
493 -- If they happen to be equal then the patch is Keep.
495 instance ToSchema a => ToSchema (Replace a) where
496 declareNamedSchema (_ :: Proxy (Replace a)) = do
497 -- TODO Keep constructor is not supported here.
498 aSchema <- declareSchemaRef (Proxy :: Proxy a)
499 return $ NamedSchema (Just "Replace") $ mempty
500 & type_ ?~ SwaggerObject
502 InsOrdHashMap.fromList
506 & required .~ [ "old", "new" ]
509 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
510 , _patch_list :: Replace ListType -- TODO Map UserId ListType
512 deriving (Eq, Show, Generic)
514 deriveJSON (unPrefix "_") ''NgramsPatch
515 makeLenses ''NgramsPatch
517 instance ToSchema NgramsPatch where
518 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
520 instance Arbitrary NgramsPatch where
521 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
523 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
525 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
526 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
528 instance Semigroup NgramsPatch where
529 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
531 instance Monoid NgramsPatch where
532 mempty = _NgramsPatch # mempty
534 instance Validity NgramsPatch where
535 validate p = p ^. _NgramsPatch . to validate
537 instance Transformable NgramsPatch where
538 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
540 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
542 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
544 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
546 type ConflictResolutionNgramsPatch =
547 ( ConflictResolutionPatchMSet NgramsTerm
548 , ConflictResolutionReplace ListType
550 type instance ConflictResolution NgramsPatch =
551 ConflictResolutionNgramsPatch
553 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
554 -- ~ Patched NgramsPatchIso
555 type instance Patched NgramsPatch = PatchedNgramsPatch
557 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
558 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
559 applicable p (Just nre) =
560 applicable (p ^. patch_children) (nre ^. nre_children) <>
561 applicable (p ^. patch_list) (nre ^. nre_list)
563 instance Action NgramsPatch NgramsRepoElement where
564 act p = (nre_children %~ act (p ^. patch_children))
565 . (nre_list %~ act (p ^. patch_list))
567 instance Action NgramsPatch (Maybe NgramsRepoElement) where
570 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
571 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
573 instance FromField NgramsTablePatch
575 fromField = fromField'
577 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
579 fromField = fromField'
581 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
583 type instance ConflictResolution NgramsTablePatch =
584 NgramsTerm -> ConflictResolutionNgramsPatch
586 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
587 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
588 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
590 makePrisms ''NgramsTablePatch
591 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
592 instance ToSchema NgramsTablePatch
594 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
595 applicable p = applicable (p ^. _NgramsTablePatch)
597 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
599 fmap (execState (reParentNgramsTablePatch p)) .
600 act (p ^. _NgramsTablePatch)
602 instance Arbitrary NgramsTablePatch where
603 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
605 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
606 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
607 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
609 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
611 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
612 reRootChildren root ngram = do
613 nre <- use $ at ngram
614 forOf_ (_Just . nre_children . folded) nre $ \child -> do
615 at child . _Just . nre_root ?= root
616 reRootChildren root child
618 reParent :: Maybe RootParent -> ReParent NgramsTerm
619 reParent rp child = do
620 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
621 . (nre_root .~ (_rp_root <$> rp))
623 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
625 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
626 reParentAddRem rp child p =
627 reParent (if isRem p then Nothing else Just rp) child
629 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
630 reParentNgramsPatch parent ngramsPatch = do
631 root_of_parent <- use (at parent . _Just . nre_root)
633 root = fromMaybe parent root_of_parent
634 rp = RootParent { _rp_root = root, _rp_parent = parent }
635 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
636 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
638 reParentNgramsTablePatch :: ReParent NgramsTablePatch
639 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
640 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
642 ------------------------------------------------------------------------
643 ------------------------------------------------------------------------
646 data Versioned a = Versioned
647 { _v_version :: Version
650 deriving (Generic, Show)
651 deriveJSON (unPrefix "_v_") ''Versioned
652 makeLenses ''Versioned
653 instance ToSchema a => ToSchema (Versioned a) where
654 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
655 instance Arbitrary a => Arbitrary (Versioned a) where
656 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
659 -- TODO sequencs of modifications (Patchs)
660 type NgramsIdPatch = Patch NgramsId NgramsPatch
662 ngramsPatch :: Int -> NgramsPatch
663 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
665 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
666 toEdit n p = Edit n p
667 ngramsIdPatch :: Patch NgramsId NgramsPatch
668 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
669 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
670 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
673 -- applyPatchBack :: Patch -> IO Patch
674 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
676 ------------------------------------------------------------------------
677 ------------------------------------------------------------------------
678 ------------------------------------------------------------------------
681 -- TODO: Replace.old is ignored which means that if the current list
682 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
683 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
684 -- However this should not happen in non conflicting situations.
685 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
686 mkListsUpdate nt patches =
687 [ (ngramsTypeId nt, ng, listTypeId lt)
688 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
689 , lt <- patch ^.. patch_list . new
692 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
695 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
696 mkChildrenGroups addOrRem nt patches =
697 [ (ngramsTypeId nt, parent, child)
698 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
699 , child <- patch ^.. patch_children . to addOrRem . folded
703 ngramsTypeFromTabType :: TabType -> NgramsType
704 ngramsTypeFromTabType tabType =
705 let lieu = "Garg.API.Ngrams: " :: Text in
707 Sources -> Ngrams.Sources
708 Authors -> Ngrams.Authors
709 Institutes -> Ngrams.Institutes
710 Terms -> Ngrams.NgramsTerms
711 _ -> panic $ lieu <> "No Ngrams for this tab"
712 -- TODO: This `panic` would disapear with custom NgramsType.
714 ------------------------------------------------------------------------
716 { _r_version :: Version
719 -- first patch in the list is the most recent
723 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
724 parseJSON = genericParseJSON $ unPrefix "_r_"
726 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
727 toJSON = genericToJSON $ unPrefix "_r_"
728 toEncoding = genericToEncoding $ unPrefix "_r_"
732 initRepo :: Monoid s => Repo s p
733 initRepo = Repo 1 mempty []
735 type NgramsRepo = Repo NgramsState NgramsStatePatch
736 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
737 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
739 initMockRepo :: NgramsRepo
740 initMockRepo = Repo 1 s []
742 s = Map.singleton Ngrams.NgramsTerms
743 $ Map.singleton 47254
745 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
747 data RepoEnv = RepoEnv
748 { _renv_var :: !(MVar NgramsRepo)
749 , _renv_saver :: !(IO ())
750 , _renv_lock :: !FileLock
756 class HasRepoVar env where
757 repoVar :: Getter env (MVar NgramsRepo)
759 instance HasRepoVar (MVar NgramsRepo) where
762 class HasRepoSaver env where
763 repoSaver :: Getter env (IO ())
765 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
766 repoEnv :: Getter env RepoEnv
768 instance HasRepo RepoEnv where
771 instance HasRepoVar RepoEnv where
774 instance HasRepoSaver RepoEnv where
775 repoSaver = renv_saver
777 type RepoCmdM env err m =
783 ------------------------------------------------------------------------
785 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
787 saveRepo = liftIO =<< view repoSaver
789 listTypeConflictResolution :: ListType -> ListType -> ListType
790 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
792 ngramsStatePatchConflictResolution
793 :: NgramsType -> NodeId -> NgramsTerm
794 -> ConflictResolutionNgramsPatch
795 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
797 -- undefined {- TODO think this through -}, listTypeConflictResolution)
800 -- Insertions are not considered as patches,
801 -- they do not extend history,
802 -- they do not bump version.
803 insertNewOnly :: a -> Maybe b -> a
804 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
805 -- TODO error handling
807 something :: Monoid a => Maybe a -> a
808 something Nothing = mempty
809 something (Just a) = a
812 -- TODO refactor with putListNgrams
813 copyListNgrams :: RepoCmdM env err m
814 => NodeId -> NodeId -> NgramsType
816 copyListNgrams srcListId dstListId ngramsType = do
818 liftIO $ modifyMVar_ var $
819 pure . (r_state . at ngramsType %~ (Just . f . something))
822 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
823 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
825 -- TODO refactor with putListNgrams
826 -- The list must be non-empty!
827 -- The added ngrams must be non-existent!
828 addListNgrams :: RepoCmdM env err m
829 => NodeId -> NgramsType
830 -> [NgramsElement] -> m ()
831 addListNgrams listId ngramsType nes = do
833 liftIO $ modifyMVar_ var $
834 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
837 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
840 -- If the given list of ngrams elements contains ngrams already in
841 -- the repo, they will be ignored.
842 putListNgrams :: RepoCmdM env err m
843 => NodeId -> NgramsType
844 -> [NgramsElement] -> m ()
845 putListNgrams _ _ [] = pure ()
846 putListNgrams listId ngramsType nes = do
847 -- printDebug "putListNgrams" (length nes)
849 liftIO $ modifyMVar_ var $
850 pure . (r_state . at ngramsType %~ (Just . (at listId %~ (Just . (<> m) . something)) . something))
853 m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
856 tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> Maybe ListType -> [NgramsTerm] -> m ()
857 tableNgramsPost tabType listId mayList =
858 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
860 -- Apply the given patch to the DB and returns the patch to be applied on the
863 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
865 -> Versioned NgramsTablePatch
866 -> m (Versioned NgramsTablePatch)
867 tableNgramsPut tabType listId (Versioned p_version p_table)
868 | p_table == mempty = do
869 let ngramsType = ngramsTypeFromTabType tabType
872 r <- liftIO $ readMVar var
875 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
876 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
878 pure (Versioned (r ^. r_version) q_table)
881 let ngramsType = ngramsTypeFromTabType tabType
882 (p0, p0_validity) = PM.singleton listId p_table
883 (p, p_validity) = PM.singleton ngramsType p0
885 assertValid p0_validity
886 assertValid p_validity
889 vq' <- liftIO $ modifyMVar var $ \r -> do
891 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
892 (p', q') = transformWith ngramsStatePatchConflictResolution p q
893 r' = r & r_version +~ 1
895 & r_history %~ (p' :)
896 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
898 -- Ideally we would like to check these properties. However:
899 -- * They should be checked only to debug the code. The client data
900 -- should be able to trigger these.
901 -- * What kind of error should they throw (we are in IO here)?
902 -- * Should we keep modifyMVar?
903 -- * Should we throw the validation in an Exception, catch it around
904 -- modifyMVar and throw it back as an Error?
905 assertValid $ transformable p q
906 assertValid $ applicable p' (r ^. r_state)
908 pure (r', Versioned (r' ^. r_version) q'_table)
913 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
914 mergeNgramsElement _neOld neNew = neNew
916 { _ne_list :: ListType
917 If we merge the parents/children we can potentially create cycles!
918 , _ne_parent :: Maybe NgramsTerm
919 , _ne_children :: MSet NgramsTerm
923 getNgramsTableMap :: RepoCmdM env err m
924 => NodeId -> NgramsType -> m (Versioned NgramsTableMap)
925 getNgramsTableMap nodeId ngramsType = do
927 repo <- liftIO $ readMVar v
928 pure $ Versioned (repo ^. r_version)
929 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
934 -- | TODO Errors management
935 -- TODO: polymorphic for Annuaire or Corpus or ...
936 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
937 -- TODO: should take only one ListId
939 getTime' :: MonadIO m => m TimeSpec
940 getTime' = liftIO $ getTime ProcessCPUTime
943 getTableNgrams :: forall env err m.
944 (RepoCmdM env err m, HasNodeError err, HasConnection env)
945 => NodeType -> NodeId -> TabType
946 -> ListId -> Limit -> Maybe Offset
948 -> Maybe MinSize -> Maybe MaxSize
950 -> (NgramsTerm -> Bool)
951 -> m (Versioned NgramsTable)
952 getTableNgrams _nType nId tabType listId limit_ offset
953 listType minSize maxSize orderBy searchQuery = do
956 -- lIds <- selectNodesWithUsername NodeList userMaster
958 ngramsType = ngramsTypeFromTabType tabType
959 offset' = maybe 0 identity offset
960 listType' = maybe (const True) (==) listType
961 minSize' = maybe (const True) (<=) minSize
962 maxSize' = maybe (const True) (>=) maxSize
964 selected_node n = minSize' s
966 && searchQuery (n ^. ne_ngrams)
967 && listType' (n ^. ne_list)
971 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
973 ---------------------------------------
974 sortOnOrder Nothing = identity
975 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
976 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
977 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
978 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
980 ---------------------------------------
981 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
982 selectAndPaginate tableMap = roots <> inners
984 list = tableMap ^.. each
985 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
987 selected_nodes = list & take limit_
989 . filter selected_node
990 . sortOnOrder orderBy
991 roots = rootOf <$> selected_nodes
992 rootsSet = Set.fromList (_ne_ngrams <$> roots)
993 inners = list & filter (selected_inner rootsSet)
995 ---------------------------------------
996 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
997 setScores False table = pure table
998 setScores True table = do
999 let ngrams_terms = (table ^.. each . ne_ngrams)
1001 occurrences <- getOccByNgramsOnlyFast nId
1005 liftIO $ hprint stderr
1006 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1007 (length ngrams_terms) t1 t2
1009 occurrences <- getOccByNgramsOnlySlow nType nId
1015 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1017 pure $ table & each %~ setOcc
1018 ---------------------------------------
1020 -- lists <- catMaybes <$> listsWith userMaster
1021 -- trace (show lists) $
1022 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1024 let nSco = needsScores orderBy
1025 tableMap1 <- getNgramsTableMap listId ngramsType
1027 tableMap2 <- tableMap1 & v_data %%~ setScores nSco
1028 . Map.mapWithKey ngramsElementFromRepo
1030 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1031 . setScores (not nSco)
1034 liftIO $ hprint stderr
1035 ("getTableNgrams total=" % timeSpecs
1036 % " map1=" % timeSpecs
1037 % " map2=" % timeSpecs
1038 % " map3=" % timeSpecs
1039 % " sql=" % if nSco then "map2" else "map3"
1041 ) t0 t3 t0 t1 t1 t2 t2 t3
1047 -- TODO: find a better place for the code above, All APIs stay here
1048 type QueryParamR = QueryParam' '[Required, Strict]
1051 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1052 deriving (Generic, Enum, Bounded, Read, Show)
1054 instance FromHttpApiData OrderBy
1056 parseUrlPiece "TermAsc" = pure TermAsc
1057 parseUrlPiece "TermDesc" = pure TermDesc
1058 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1059 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1060 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1062 instance ToParamSchema OrderBy
1063 instance FromJSON OrderBy
1064 instance ToJSON OrderBy
1065 instance ToSchema OrderBy
1066 instance Arbitrary OrderBy
1068 arbitrary = elements [minBound..maxBound]
1070 needsScores :: Maybe OrderBy -> Bool
1071 needsScores (Just ScoreAsc) = True
1072 needsScores (Just ScoreDesc) = True
1073 needsScores _ = False
1075 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1076 :> QueryParamR "ngramsType" TabType
1077 :> QueryParamR "list" ListId
1078 :> QueryParamR "limit" Limit
1079 :> QueryParam "offset" Offset
1080 :> QueryParam "listType" ListType
1081 :> QueryParam "minTermSize" MinSize
1082 :> QueryParam "maxTermSize" MaxSize
1083 :> QueryParam "orderBy" OrderBy
1084 :> QueryParam "search" Text
1085 :> Get '[JSON] (Versioned NgramsTable)
1087 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1088 :> QueryParamR "ngramsType" TabType
1089 :> QueryParamR "list" ListId
1090 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1091 :> Put '[JSON] (Versioned NgramsTablePatch)
1093 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1094 :> QueryParamR "ngramsType" TabType
1095 :> QueryParamR "list" ListId
1096 :> QueryParam "listType" ListType
1097 :> ReqBody '[JSON] [NgramsTerm]
1100 type TableNgramsApi = TableNgramsApiGet
1101 :<|> TableNgramsApiPut
1102 :<|> TableNgramsApiPost
1104 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1105 => NodeId -> TabType
1106 -> ListId -> Limit -> Maybe Offset
1108 -> Maybe MinSize -> Maybe MaxSize
1110 -> Maybe Text -- full text search
1111 -> m (Versioned NgramsTable)
1112 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1113 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1115 searchQuery = maybe (const True) isInfixOf mt
1117 -- | Text search is deactivated for now for ngrams by doc only
1118 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1120 -> ListId -> Limit -> Maybe Offset
1122 -> Maybe MinSize -> Maybe MaxSize
1124 -> Maybe Text -- full text search
1125 -> m (Versioned NgramsTable)
1126 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1127 ns <- selectNodesWithUsername NodeList userMaster
1128 let ngramsType = ngramsTypeFromTabType tabType
1129 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1130 let searchQuery = flip S.member (S.fromList ngs)
1131 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1137 apiNgramsTableCorpus :: ( RepoCmdM env err m
1139 , HasInvalidError err
1142 => NodeId -> ServerT TableNgramsApi m
1143 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1145 :<|> tableNgramsPost
1148 apiNgramsTableDoc :: ( RepoCmdM env err m
1150 , HasInvalidError err
1153 => DocId -> ServerT TableNgramsApi m
1154 apiNgramsTableDoc dId = getTableNgramsDoc dId
1156 :<|> tableNgramsPost
1157 -- > add new ngrams in database (TODO AD)
1158 -- > index all the corpus accordingly (TODO AD)