1 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
3 Module : Gargantext.API.Ngrams
4 Description : Server API
5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 get ngrams filtered by NgramsType
19 {-# LANGUAGE ConstraintKinds #-}
20 {-# LANGUAGE DataKinds #-}
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
24 {-# LANGUAGE ScopedTypeVariables #-}
25 {-# LANGUAGE TemplateHaskell #-}
26 {-# LANGUAGE TypeOperators #-}
27 {-# LANGUAGE FlexibleContexts #-}
28 {-# LANGUAGE FlexibleInstances #-}
29 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
30 {-# LANGUAGE MultiParamTypeClasses #-}
31 {-# LANGUAGE RankNTypes #-}
32 {-# LANGUAGE TypeFamilies #-}
33 {-# OPTIONS -fno-warn-orphans #-}
35 module Gargantext.API.Ngrams
44 , apiNgramsTableCorpus
65 , NgramsRepoElement(..)
74 , ngramsTypeFromTabType
90 , listNgramsChangedSince
94 -- import Debug.Trace (trace)
95 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
96 -- import Gargantext.Database.Schema.User (UserId)
97 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
98 Composable(..), Transformable(..),
99 PairPatch(..), Patched, ConflictResolution,
100 ConflictResolutionReplace, ours)
101 import qualified Data.Map.Strict.Patch as PM
103 import Data.Ord (Down(..))
105 --import Data.Semigroup
106 import Data.Set (Set)
107 import qualified Data.Set as S
108 import qualified Data.List as List
109 import Data.Maybe (fromMaybe)
110 -- import Data.Tuple.Extra (first)
111 import qualified Data.Map.Strict as Map
112 import Data.Map.Strict (Map)
113 import qualified Data.Set as Set
114 import Control.Category ((>>>))
115 import Control.Concurrent
116 import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~), mapped)
117 import Control.Monad.Error.Class (MonadError)
118 import Control.Monad.Reader
119 import Control.Monad.State
120 import Data.Aeson hiding ((.=))
121 import Data.Aeson.TH (deriveJSON)
122 import Data.Either(Either(Left))
123 -- import Data.Map (lookup)
124 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
125 import Data.Swagger hiding (version, patch)
126 import Data.Text (Text, isInfixOf, count)
128 import Formatting (hprint, int, (%))
129 import Formatting.Clock (timeSpecs)
130 import GHC.Generics (Generic)
131 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
132 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
133 import Gargantext.Database.Config (userMaster)
134 import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
135 import Gargantext.Database.Schema.Ngrams (NgramsType)
136 import Gargantext.Database.Types.Node (NodeType(..))
137 import Gargantext.Database.Utils (fromField', HasConnection)
138 import Gargantext.Database.Node.Select
139 import Gargantext.Database.Ngrams
140 --import Gargantext.Database.Lists (listsWith)
141 import Gargantext.Database.Schema.Node (HasNodeError)
142 import Database.PostgreSQL.Simple.FromField (FromField, fromField)
143 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
144 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
145 import Gargantext.Prelude
146 import Gargantext.Core.Types (TODO)
147 import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
148 import Servant hiding (Patch)
149 import System.Clock (getTime, TimeSpec, Clock(..))
150 import System.FileLock (FileLock)
151 import System.IO (stderr)
152 import Test.QuickCheck (elements)
153 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
155 ------------------------------------------------------------------------
156 --data FacetFormat = Table | Chart
157 data TabType = Docs | Trash | MoreFav | MoreTrash
158 | Terms | Sources | Authors | Institutes
160 deriving (Generic, Enum, Bounded, Show)
162 instance FromHttpApiData TabType
164 parseUrlPiece "Docs" = pure Docs
165 parseUrlPiece "Trash" = pure Trash
166 parseUrlPiece "MoreFav" = pure MoreFav
167 parseUrlPiece "MoreTrash" = pure MoreTrash
169 parseUrlPiece "Terms" = pure Terms
170 parseUrlPiece "Sources" = pure Sources
171 parseUrlPiece "Institutes" = pure Institutes
172 parseUrlPiece "Authors" = pure Authors
174 parseUrlPiece "Contacts" = pure Contacts
176 parseUrlPiece _ = Left "Unexpected value of TabType"
178 instance ToParamSchema TabType
179 instance ToJSON TabType
180 instance FromJSON TabType
181 instance ToSchema TabType
182 instance Arbitrary TabType
184 arbitrary = elements [minBound .. maxBound]
186 newtype MSet a = MSet (Map a ())
187 deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
189 instance ToJSON a => ToJSON (MSet a) where
190 toJSON (MSet m) = toJSON (Map.keys m)
191 toEncoding (MSet m) = toEncoding (Map.keys m)
193 mSetFromSet :: Set a -> MSet a
194 mSetFromSet = MSet . Map.fromSet (const ())
196 mSetFromList :: Ord a => [a] -> MSet a
197 mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
199 -- mSetToSet :: Ord a => MSet a -> Set a
200 -- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
201 mSetToSet :: Ord a => MSet a -> Set a
202 mSetToSet = Set.fromList . mSetToList
204 mSetToList :: MSet a -> [a]
205 mSetToList (MSet a) = Map.keys a
207 instance Foldable MSet where
208 foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
210 instance (Ord a, FromJSON a) => FromJSON (MSet a) where
211 parseJSON = fmap mSetFromList . parseJSON
213 instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
215 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
217 ------------------------------------------------------------------------
218 type NgramsTerm = Text
220 data RootParent = RootParent
221 { _rp_root :: NgramsTerm
222 , _rp_parent :: NgramsTerm
224 deriving (Ord, Eq, Show, Generic)
226 deriveJSON (unPrefix "_rp_") ''RootParent
227 makeLenses ''RootParent
229 data NgramsRepoElement = NgramsRepoElement
231 , _nre_list :: ListType
232 --, _nre_root_parent :: Maybe RootParent
233 , _nre_root :: Maybe NgramsTerm
234 , _nre_parent :: Maybe NgramsTerm
235 , _nre_children :: MSet NgramsTerm
237 deriving (Ord, Eq, Show, Generic)
239 deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
240 makeLenses ''NgramsRepoElement
243 NgramsElement { _ne_ngrams :: NgramsTerm
245 , _ne_list :: ListType
246 , _ne_occurrences :: Int
247 , _ne_root :: Maybe NgramsTerm
248 , _ne_parent :: Maybe NgramsTerm
249 , _ne_children :: MSet NgramsTerm
251 deriving (Ord, Eq, Show, Generic)
253 deriveJSON (unPrefix "_ne_") ''NgramsElement
254 makeLenses ''NgramsElement
256 mkNgramsElement :: NgramsTerm -> ListType -> Maybe RootParent -> MSet NgramsTerm -> NgramsElement
257 mkNgramsElement ngrams list rp children =
258 NgramsElement ngrams size list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
261 size = 1 + count " " ngrams
263 newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
264 newNgramsElement mayList ngrams = mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
266 instance ToSchema NgramsElement where
267 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
268 instance Arbitrary NgramsElement where
269 arbitrary = elements [newNgramsElement Nothing "sport"]
271 ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
273 (NgramsElement { _ne_size = s
287 ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
288 ngramsElementFromRepo
297 NgramsElement { _ne_size = s
302 , _ne_ngrams = ngrams
303 , _ne_occurrences = panic $ "API.Ngrams._ne_occurrences"
305 -- Here we could use 0 if we want to avoid any `panic`.
306 -- It will not happen using getTableNgrams if
307 -- getOccByNgramsOnly provides a count of occurrences for
308 -- all the ngrams given.
312 ------------------------------------------------------------------------
313 newtype NgramsTable = NgramsTable [NgramsElement]
314 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
316 type ListNgrams = NgramsTable
318 makePrisms ''NgramsTable
320 -- | Question: why these repetition of Type in this instance
321 -- may you document it please ?
322 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
323 each = _NgramsTable . each
326 -- | TODO Check N and Weight
328 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
329 toNgramsElement ns = map toNgramsElement' ns
331 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
335 Just x -> lookup x mapParent
336 c' = maybe mempty identity $ lookup t mapChildren
337 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
339 mapParent :: Map Int Text
340 mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
342 mapChildren :: Map Text (Set Text)
343 mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
344 $ Map.fromListWith (<>)
345 $ map (first fromJust)
346 $ filter (isJust . fst)
347 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
350 mockTable :: NgramsTable
351 mockTable = NgramsTable
352 [ mkNgramsElement "animal" GraphTerm Nothing (mSetFromList ["dog", "cat"])
353 , mkNgramsElement "cat" GraphTerm (rp "animal") mempty
354 , mkNgramsElement "cats" StopTerm Nothing mempty
355 , mkNgramsElement "dog" GraphTerm (rp "animal") (mSetFromList ["dogs"])
356 , mkNgramsElement "dogs" StopTerm (rp "dog") mempty
357 , mkNgramsElement "fox" GraphTerm Nothing mempty
358 , mkNgramsElement "object" CandidateTerm Nothing mempty
359 , mkNgramsElement "nothing" StopTerm Nothing mempty
360 , mkNgramsElement "organic" GraphTerm Nothing (mSetFromList ["flower"])
361 , mkNgramsElement "flower" GraphTerm (rp "organic") mempty
362 , mkNgramsElement "moon" CandidateTerm Nothing mempty
363 , mkNgramsElement "sky" StopTerm Nothing mempty
366 rp n = Just $ RootParent n n
368 instance Arbitrary NgramsTable where
369 arbitrary = pure mockTable
371 instance ToSchema NgramsTable
373 ------------------------------------------------------------------------
374 type NgramsTableMap = Map NgramsTerm NgramsRepoElement
376 ------------------------------------------------------------------------
377 -- On the Client side:
378 --data Action = InGroup NgramsId NgramsId
379 -- | OutGroup NgramsId NgramsId
380 -- | SetListType NgramsId ListType
382 data PatchSet a = PatchSet
386 deriving (Eq, Ord, Show, Generic)
388 makeLenses ''PatchSet
389 makePrisms ''PatchSet
391 instance ToJSON a => ToJSON (PatchSet a) where
392 toJSON = genericToJSON $ unPrefix "_"
393 toEncoding = genericToEncoding $ unPrefix "_"
395 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
396 parseJSON = genericParseJSON $ unPrefix "_"
399 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
400 arbitrary = PatchSet <$> arbitrary <*> arbitrary
402 type instance Patched (PatchSet a) = Set a
404 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
405 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
407 instance Ord a => Semigroup (PatchSet a) where
408 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
409 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
412 instance Ord a => Monoid (PatchSet a) where
413 mempty = PatchSet mempty mempty
415 instance Ord a => Group (PatchSet a) where
416 invert (PatchSet r a) = PatchSet a r
418 instance Ord a => Composable (PatchSet a) where
419 composable _ _ = undefined
421 instance Ord a => Action (PatchSet a) (Set a) where
422 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
424 instance Applicable (PatchSet a) (Set a) where
425 applicable _ _ = mempty
427 instance Ord a => Validity (PatchSet a) where
428 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
430 instance Ord a => Transformable (PatchSet a) where
431 transformable = undefined
433 conflicts _p _q = undefined
435 transformWith conflict p q = undefined conflict p q
437 instance ToSchema a => ToSchema (PatchSet a)
440 type AddRem = Replace (Maybe ())
442 remPatch, addPatch :: AddRem
443 remPatch = replace (Just ()) Nothing
444 addPatch = replace Nothing (Just ())
446 isRem :: Replace (Maybe ()) -> Bool
447 isRem = (== remPatch)
449 type PatchMap = PM.PatchMap
451 newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
452 deriving (Eq, Show, Generic, Validity, Semigroup, Monoid,
453 Transformable, Composable)
455 type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
456 type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
458 -- TODO this breaks module abstraction
459 makePrisms ''PM.PatchMap
461 makePrisms ''PatchMSet
463 _PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
464 _PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
466 f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
467 f = Map.partition isRem >>> both %~ Map.keysSet
469 g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
470 g (rems, adds) = Map.fromSet (const remPatch) rems
471 <> Map.fromSet (const addPatch) adds
473 instance Ord a => Action (PatchMSet a) (MSet a) where
474 act (PatchMSet p) (MSet m) = MSet $ act p m
476 instance Ord a => Applicable (PatchMSet a) (MSet a) where
477 applicable (PatchMSet p) (MSet m) = applicable p m
479 instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
480 toJSON = toJSON . view _PatchMSetIso
481 toEncoding = toEncoding . view _PatchMSetIso
483 instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
484 parseJSON = fmap (_PatchMSetIso #) . parseJSON
486 instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
487 arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
489 instance ToSchema a => ToSchema (PatchMSet a) where
491 declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
493 type instance Patched (PatchMSet a) = MSet a
495 instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
496 arbitrary = uncurry replace <$> arbitrary
497 -- If they happen to be equal then the patch is Keep.
499 instance ToSchema a => ToSchema (Replace a) where
500 declareNamedSchema (_ :: Proxy (Replace a)) = do
501 -- TODO Keep constructor is not supported here.
502 aSchema <- declareSchemaRef (Proxy :: Proxy a)
503 return $ NamedSchema (Just "Replace") $ mempty
504 & type_ ?~ SwaggerObject
506 InsOrdHashMap.fromList
510 & required .~ [ "old", "new" ]
513 NgramsPatch { _patch_children :: PatchMSet NgramsTerm
514 , _patch_list :: Replace ListType -- TODO Map UserId ListType
516 deriving (Eq, Show, Generic)
518 deriveJSON (unPrefix "_") ''NgramsPatch
519 makeLenses ''NgramsPatch
521 instance ToSchema NgramsPatch where
522 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
524 instance Arbitrary NgramsPatch where
525 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
527 type NgramsPatchIso = PairPatch (PatchMSet NgramsTerm) (Replace ListType)
529 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
530 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
532 instance Semigroup NgramsPatch where
533 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
535 instance Monoid NgramsPatch where
536 mempty = _NgramsPatch # mempty
538 instance Validity NgramsPatch where
539 validate p = p ^. _NgramsPatch . to validate
541 instance Transformable NgramsPatch where
542 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
544 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
546 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
548 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
550 type ConflictResolutionNgramsPatch =
551 ( ConflictResolutionPatchMSet NgramsTerm
552 , ConflictResolutionReplace ListType
554 type instance ConflictResolution NgramsPatch =
555 ConflictResolutionNgramsPatch
557 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
558 -- ~ Patched NgramsPatchIso
559 type instance Patched NgramsPatch = PatchedNgramsPatch
561 instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
562 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
563 applicable p (Just nre) =
564 applicable (p ^. patch_children) (nre ^. nre_children) <>
565 applicable (p ^. patch_list) (nre ^. nre_list)
567 instance Action NgramsPatch NgramsRepoElement where
568 act p = (nre_children %~ act (p ^. patch_children))
569 . (nre_list %~ act (p ^. patch_list))
571 instance Action NgramsPatch (Maybe NgramsRepoElement) where
574 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
575 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
577 instance FromField NgramsTablePatch
579 fromField = fromField'
581 instance FromField (PatchMap NgramsType (PatchMap NodeId NgramsTablePatch))
583 fromField = fromField'
585 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
587 type instance ConflictResolution NgramsTablePatch =
588 NgramsTerm -> ConflictResolutionNgramsPatch
590 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
591 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
592 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
594 makePrisms ''NgramsTablePatch
595 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
596 instance ToSchema NgramsTablePatch
598 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
599 applicable p = applicable (p ^. _NgramsTablePatch)
601 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
603 fmap (execState (reParentNgramsTablePatch p)) .
604 act (p ^. _NgramsTablePatch)
606 instance Arbitrary NgramsTablePatch where
607 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
609 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
610 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
611 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
613 type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
615 reRootChildren :: NgramsTerm -> ReParent NgramsTerm
616 reRootChildren root ngram = do
617 nre <- use $ at ngram
618 forOf_ (_Just . nre_children . folded) nre $ \child -> do
619 at child . _Just . nre_root ?= root
620 reRootChildren root child
622 reParent :: Maybe RootParent -> ReParent NgramsTerm
623 reParent rp child = do
624 at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
625 . (nre_root .~ (_rp_root <$> rp))
627 reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
629 reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
630 reParentAddRem rp child p =
631 reParent (if isRem p then Nothing else Just rp) child
633 reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
634 reParentNgramsPatch parent ngramsPatch = do
635 root_of_parent <- use (at parent . _Just . nre_root)
637 root = fromMaybe parent root_of_parent
638 rp = RootParent { _rp_root = root, _rp_parent = parent }
639 itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
640 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
642 reParentNgramsTablePatch :: ReParent NgramsTablePatch
643 reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
644 -- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
646 ------------------------------------------------------------------------
647 ------------------------------------------------------------------------
650 data Versioned a = Versioned
651 { _v_version :: Version
654 deriving (Generic, Show)
655 deriveJSON (unPrefix "_v_") ''Versioned
656 makeLenses ''Versioned
657 instance ToSchema a => ToSchema (Versioned a) where
658 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_v_")
659 instance Arbitrary a => Arbitrary (Versioned a) where
660 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
663 -- TODO sequencs of modifications (Patchs)
664 type NgramsIdPatch = Patch NgramsId NgramsPatch
666 ngramsPatch :: Int -> NgramsPatch
667 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
669 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
670 toEdit n p = Edit n p
671 ngramsIdPatch :: Patch NgramsId NgramsPatch
672 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
673 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
674 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
677 -- applyPatchBack :: Patch -> IO Patch
678 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
680 ------------------------------------------------------------------------
681 ------------------------------------------------------------------------
682 ------------------------------------------------------------------------
685 -- TODO: Replace.old is ignored which means that if the current list
686 -- `GraphTerm` and that the patch is `Replace CandidateTerm StopTerm` then
687 -- the list is going to be `StopTerm` while it should keep `GraphTerm`.
688 -- However this should not happen in non conflicting situations.
689 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
690 mkListsUpdate nt patches =
691 [ (ngramsTypeId nt, ng, listTypeId lt)
692 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
693 , lt <- patch ^.. patch_list . new
696 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
699 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
700 mkChildrenGroups addOrRem nt patches =
701 [ (ngramsTypeId nt, parent, child)
702 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
703 , child <- patch ^.. patch_children . to addOrRem . folded
707 ngramsTypeFromTabType :: TabType -> NgramsType
708 ngramsTypeFromTabType tabType =
709 let lieu = "Garg.API.Ngrams: " :: Text in
711 Sources -> Ngrams.Sources
712 Authors -> Ngrams.Authors
713 Institutes -> Ngrams.Institutes
714 Terms -> Ngrams.NgramsTerms
715 _ -> panic $ lieu <> "No Ngrams for this tab"
716 -- TODO: This `panic` would disapear with custom NgramsType.
718 ------------------------------------------------------------------------
720 { _r_version :: Version
723 -- first patch in the list is the most recent
727 instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
728 parseJSON = genericParseJSON $ unPrefix "_r_"
730 instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
731 toJSON = genericToJSON $ unPrefix "_r_"
732 toEncoding = genericToEncoding $ unPrefix "_r_"
736 initRepo :: Monoid s => Repo s p
737 initRepo = Repo 1 mempty []
739 type NgramsRepo = Repo NgramsState NgramsStatePatch
740 type NgramsState = Map NgramsType (Map NodeId NgramsTableMap)
741 type NgramsStatePatch = PatchMap NgramsType (PatchMap NodeId NgramsTablePatch)
743 initMockRepo :: NgramsRepo
744 initMockRepo = Repo 1 s []
746 s = Map.singleton Ngrams.NgramsTerms
747 $ Map.singleton 47254
749 [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
751 data RepoEnv = RepoEnv
752 { _renv_var :: !(MVar NgramsRepo)
753 , _renv_saver :: !(IO ())
754 , _renv_lock :: !FileLock
760 class HasRepoVar env where
761 repoVar :: Getter env (MVar NgramsRepo)
763 instance HasRepoVar (MVar NgramsRepo) where
766 class HasRepoSaver env where
767 repoSaver :: Getter env (IO ())
769 class (HasRepoVar env, HasRepoSaver env) => HasRepo env where
770 repoEnv :: Getter env RepoEnv
772 instance HasRepo RepoEnv where
775 instance HasRepoVar RepoEnv where
778 instance HasRepoSaver RepoEnv where
779 repoSaver = renv_saver
781 type RepoCmdM env err m =
787 ------------------------------------------------------------------------
789 saveRepo :: ( MonadReader env m, MonadIO m, HasRepoSaver env )
791 saveRepo = liftIO =<< view repoSaver
793 listTypeConflictResolution :: ListType -> ListType -> ListType
794 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
796 ngramsStatePatchConflictResolution
797 :: NgramsType -> NodeId -> NgramsTerm
798 -> ConflictResolutionNgramsPatch
799 ngramsStatePatchConflictResolution _ngramsType _nodeId _ngramsTerm
801 -- undefined {- TODO think this through -}, listTypeConflictResolution)
804 -- Insertions are not considered as patches,
805 -- they do not extend history,
806 -- they do not bump version.
807 insertNewOnly :: a -> Maybe b -> a
808 insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
809 -- TODO error handling
811 something :: Monoid a => Maybe a -> a
812 something Nothing = mempty
813 something (Just a) = a
816 -- TODO refactor with putListNgrams
817 copyListNgrams :: RepoCmdM env err m
818 => NodeId -> NodeId -> NgramsType
820 copyListNgrams srcListId dstListId ngramsType = do
822 liftIO $ modifyMVar_ var $
823 pure . (r_state . at ngramsType %~ (Just . f . something))
826 f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
827 f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
829 -- TODO refactor with putListNgrams
830 -- The list must be non-empty!
831 -- The added ngrams must be non-existent!
832 addListNgrams :: RepoCmdM env err m
833 => NodeId -> NgramsType
834 -> [NgramsElement] -> m ()
835 addListNgrams listId ngramsType nes = do
837 liftIO $ modifyMVar_ var $
838 pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
841 m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
844 -- If the given list of ngrams elements contains ngrams already in
845 -- the repo, they will be ignored.
846 putListNgrams :: RepoCmdM env err m
847 => NodeId -> NgramsType
848 -> [NgramsElement] -> m ()
849 putListNgrams _ _ [] = pure ()
850 putListNgrams listId ngramsType nes = do
851 -- printDebug "putListNgrams" (length nes)
853 liftIO $ modifyMVar_ var $
854 pure . (r_state . at ngramsType %~ (Just . (at listId %~ (Just . (<> m) . something)) . something))
857 m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
860 tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> Maybe ListType -> [NgramsTerm] -> m ()
861 tableNgramsPost tabType listId mayList =
862 putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
864 currentVersion :: RepoCmdM env err m => m Version
867 r <- liftIO $ readMVar var
868 pure $ r ^. r_version
870 tableNgramsPull :: RepoCmdM env err m
871 => ListId -> NgramsType
873 -> m (Versioned NgramsTablePatch)
874 tableNgramsPull listId ngramsType p_version = do
876 r <- liftIO $ readMVar var
879 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
880 q_table = q ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
882 pure (Versioned (r ^. r_version) q_table)
884 -- Apply the given patch to the DB and returns the patch to be applied on the
887 tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
889 -> Versioned NgramsTablePatch
890 -> m (Versioned NgramsTablePatch)
891 tableNgramsPut tabType listId (Versioned p_version p_table)
892 | p_table == mempty = do
893 let ngramsType = ngramsTypeFromTabType tabType
894 tableNgramsPull listId ngramsType p_version
897 let ngramsType = ngramsTypeFromTabType tabType
898 (p0, p0_validity) = PM.singleton listId p_table
899 (p, p_validity) = PM.singleton ngramsType p0
901 assertValid p0_validity
902 assertValid p_validity
905 vq' <- liftIO $ modifyMVar var $ \r -> do
907 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
908 (p', q') = transformWith ngramsStatePatchConflictResolution p q
909 r' = r & r_version +~ 1
911 & r_history %~ (p' :)
912 q'_table = q' ^. _PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just
914 -- Ideally we would like to check these properties. However:
915 -- * They should be checked only to debug the code. The client data
916 -- should be able to trigger these.
917 -- * What kind of error should they throw (we are in IO here)?
918 -- * Should we keep modifyMVar?
919 -- * Should we throw the validation in an Exception, catch it around
920 -- modifyMVar and throw it back as an Error?
921 assertValid $ transformable p q
922 assertValid $ applicable p' (r ^. r_state)
924 pure (r', Versioned (r' ^. r_version) q'_table)
929 mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
930 mergeNgramsElement _neOld neNew = neNew
932 { _ne_list :: ListType
933 If we merge the parents/children we can potentially create cycles!
934 , _ne_parent :: Maybe NgramsTerm
935 , _ne_children :: MSet NgramsTerm
939 getNgramsTableMap :: RepoCmdM env err m
940 => NodeId -> NgramsType -> m (Versioned NgramsTableMap)
941 getNgramsTableMap nodeId ngramsType = do
943 repo <- liftIO $ readMVar v
944 pure $ Versioned (repo ^. r_version)
945 (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
950 -- | TODO Errors management
951 -- TODO: polymorphic for Annuaire or Corpus or ...
952 -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
953 -- TODO: should take only one ListId
955 getTime' :: MonadIO m => m TimeSpec
956 getTime' = liftIO $ getTime ProcessCPUTime
959 getTableNgrams :: forall env err m.
960 (RepoCmdM env err m, HasNodeError err, HasConnection env)
961 => NodeType -> NodeId -> TabType
962 -> ListId -> Limit -> Maybe Offset
964 -> Maybe MinSize -> Maybe MaxSize
966 -> (NgramsTerm -> Bool)
967 -> m (Versioned NgramsTable)
968 getTableNgrams _nType nId tabType listId limit_ offset
969 listType minSize maxSize orderBy searchQuery = do
972 -- lIds <- selectNodesWithUsername NodeList userMaster
974 ngramsType = ngramsTypeFromTabType tabType
975 offset' = maybe 0 identity offset
976 listType' = maybe (const True) (==) listType
977 minSize' = maybe (const True) (<=) minSize
978 maxSize' = maybe (const True) (>=) maxSize
980 selected_node n = minSize' s
982 && searchQuery (n ^. ne_ngrams)
983 && listType' (n ^. ne_list)
987 selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
989 ---------------------------------------
990 sortOnOrder Nothing = identity
991 sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
992 sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
993 sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
994 sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
996 ---------------------------------------
997 selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
998 selectAndPaginate tableMap = roots <> inners
1000 list = tableMap ^.. each
1001 rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. at r))
1003 selected_nodes = list & take limit_
1005 . filter selected_node
1006 . sortOnOrder orderBy
1007 roots = rootOf <$> selected_nodes
1008 rootsSet = Set.fromList (_ne_ngrams <$> roots)
1009 inners = list & filter (selected_inner rootsSet)
1011 ---------------------------------------
1012 setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
1013 setScores False table = pure table
1014 setScores True table = do
1015 let ngrams_terms = (table ^.. each . ne_ngrams)
1017 occurrences <- getOccByNgramsOnlyFast' nId
1022 liftIO $ hprint stderr
1023 ("getTableNgrams/setScores #ngrams=" % int % " time=" % timeSpecs % "\n")
1024 (length ngrams_terms) t1 t2
1026 occurrences <- getOccByNgramsOnlySlow nType nId
1032 setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
1034 pure $ table & each %~ setOcc
1035 ---------------------------------------
1037 -- lists <- catMaybes <$> listsWith userMaster
1038 -- trace (show lists) $
1039 -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
1041 let scoresNeeded = needsScores orderBy
1042 tableMap1 <- getNgramsTableMap listId ngramsType
1044 tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
1045 . Map.mapWithKey ngramsElementFromRepo
1047 tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
1048 . setScores (not scoresNeeded)
1051 liftIO $ hprint stderr
1052 ("getTableNgrams total=" % timeSpecs
1053 % " map1=" % timeSpecs
1054 % " map2=" % timeSpecs
1055 % " map3=" % timeSpecs
1056 % " sql=" % (if scoresNeeded then "map2" else "map3")
1058 ) t0 t3 t0 t1 t1 t2 t2 t3
1064 -- TODO: find a better place for the code above, All APIs stay here
1065 type QueryParamR = QueryParam' '[Required, Strict]
1068 data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
1069 deriving (Generic, Enum, Bounded, Read, Show)
1071 instance FromHttpApiData OrderBy
1073 parseUrlPiece "TermAsc" = pure TermAsc
1074 parseUrlPiece "TermDesc" = pure TermDesc
1075 parseUrlPiece "ScoreAsc" = pure ScoreAsc
1076 parseUrlPiece "ScoreDesc" = pure ScoreDesc
1077 parseUrlPiece _ = Left "Unexpected value of OrderBy"
1079 instance ToParamSchema OrderBy
1080 instance FromJSON OrderBy
1081 instance ToJSON OrderBy
1082 instance ToSchema OrderBy
1083 instance Arbitrary OrderBy
1085 arbitrary = elements [minBound..maxBound]
1087 needsScores :: Maybe OrderBy -> Bool
1088 needsScores (Just ScoreAsc) = True
1089 needsScores (Just ScoreDesc) = True
1090 needsScores _ = False
1092 type TableNgramsApiGet = Summary " Table Ngrams API Get"
1093 :> QueryParamR "ngramsType" TabType
1094 :> QueryParamR "list" ListId
1095 :> QueryParamR "limit" Limit
1096 :> QueryParam "offset" Offset
1097 :> QueryParam "listType" ListType
1098 :> QueryParam "minTermSize" MinSize
1099 :> QueryParam "maxTermSize" MaxSize
1100 :> QueryParam "orderBy" OrderBy
1101 :> QueryParam "search" Text
1102 :> Get '[JSON] (Versioned NgramsTable)
1104 type TableNgramsApiPut = Summary " Table Ngrams API Change"
1105 :> QueryParamR "ngramsType" TabType
1106 :> QueryParamR "list" ListId
1107 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
1108 :> Put '[JSON] (Versioned NgramsTablePatch)
1110 type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
1111 :> QueryParamR "ngramsType" TabType
1112 :> QueryParamR "list" ListId
1113 :> QueryParam "listType" ListType
1114 :> ReqBody '[JSON] [NgramsTerm]
1117 type TableNgramsApi = TableNgramsApiGet
1118 :<|> TableNgramsApiPut
1119 :<|> TableNgramsApiPost
1121 getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1122 => NodeId -> TabType
1123 -> ListId -> Limit -> Maybe Offset
1125 -> Maybe MinSize -> Maybe MaxSize
1127 -> Maybe Text -- full text search
1128 -> m (Versioned NgramsTable)
1129 getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
1130 getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1132 searchQuery = maybe (const True) isInfixOf mt
1134 -- | Text search is deactivated for now for ngrams by doc only
1135 getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
1137 -> ListId -> Limit -> Maybe Offset
1139 -> Maybe MinSize -> Maybe MaxSize
1141 -> Maybe Text -- full text search
1142 -> m (Versioned NgramsTable)
1143 getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
1144 ns <- selectNodesWithUsername NodeList userMaster
1145 let ngramsType = ngramsTypeFromTabType tabType
1146 ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
1147 let searchQuery = flip S.member (S.fromList ngs)
1148 getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
1152 apiNgramsTableCorpus :: ( RepoCmdM env err m
1154 , HasInvalidError err
1157 => NodeId -> ServerT TableNgramsApi m
1158 apiNgramsTableCorpus cId = getTableNgramsCorpus cId
1160 :<|> tableNgramsPost
1163 apiNgramsTableDoc :: ( RepoCmdM env err m
1165 , HasInvalidError err
1168 => DocId -> ServerT TableNgramsApi m
1169 apiNgramsTableDoc dId = getTableNgramsDoc dId
1171 :<|> tableNgramsPost
1172 -- > add new ngrams in database (TODO AD)
1173 -- > index all the corpus accordingly (TODO AD)
1175 listNgramsChangedSince :: RepoCmdM env err m => ListId -> NgramsType -> Version -> m (Versioned Bool)
1176 listNgramsChangedSince listId ngramsType version
1178 Versioned <$> currentVersion <*> pure True
1180 tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)