2 Module : Gargantext.API.Ngrams
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
13 get ngrams filtered by NgramsType
18 {-# LANGUAGE ConstraintKinds #-}
19 {-# LANGUAGE DataKinds #-}
20 {-# LANGUAGE DeriveGeneric #-}
21 {-# LANGUAGE NoImplicitPrelude #-}
22 {-# LANGUAGE OverloadedStrings #-}
23 {-# LANGUAGE ScopedTypeVariables #-}
24 {-# LANGUAGE TemplateHaskell #-}
25 {-# LANGUAGE TypeOperators #-}
26 {-# LANGUAGE FlexibleInstances #-}
27 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
29 {-# LANGUAGE RankNTypes #-}
30 {-# LANGUAGE TypeFamilies #-}
31 {-# OPTIONS -fno-warn-orphans #-}
33 module Gargantext.API.Ngrams
36 import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound, round)
37 -- import Gargantext.Database.Schema.User (UserId)
38 import Data.Functor (($>))
39 import Data.Patch.Class (Replace, replace, Action(act), Applicable(..),
40 Composable(..), Group(..), Transformable(..),
41 PairPatch(..), Patched, ConflictResolution,
42 ConflictResolutionReplace,
43 SimpleConflictResolution')
44 import qualified Data.Map.Strict.Patch as PM
46 --import Data.Semigroup
48 import qualified Data.Set as Set
49 import Data.Maybe (isJust)
50 import Data.Tuple.Extra (first)
51 -- import qualified Data.Map.Strict as DM
52 import Data.Map.Strict (Map, mapKeys, fromListWith)
53 --import qualified Data.Set as Set
54 import Control.Concurrent
55 import Control.Lens (makeLenses, makePrisms, Getter, Prism', prism', Iso', iso, (^..), (.~), (#), to, {-withIndex, folded, ifolded,-} view, (^.), (+~), (%~), at, _Just, Each(..), dropping, taking)
56 import Control.Monad (guard)
57 import Control.Monad.Error.Class (MonadError, throwError)
58 import Control.Monad.Reader
60 import Data.Aeson.TH (deriveJSON)
61 import Data.Either(Either(Left))
62 import Data.Map (lookup)
63 import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
64 import Data.Swagger hiding (version, patch)
65 import Data.Text (Text)
67 import GHC.Generics (Generic)
68 import Gargantext.Core.Utils.Prefix (unPrefix)
69 import Gargantext.Database.Schema.Node (defaultList, HasNodeError)
70 -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId)
71 import Gargantext.Database.Schema.Ngrams (NgramsType, NgramsTableData(..))
72 import qualified Gargantext.Database.Schema.Ngrams as Ngrams
73 -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
74 import Gargantext.Database.Utils (CmdM)
75 import Gargantext.Prelude
76 -- import Gargantext.Core.Types (ListTypeId, listTypeId)
77 import Gargantext.Core.Types (ListType(..), ListId, CorpusId, Limit, Offset)
78 import Servant hiding (Patch)
79 import Test.QuickCheck (elements)
80 import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
82 ------------------------------------------------------------------------
83 --data FacetFormat = Table | Chart
84 data TabType = Docs | Terms | Sources | Authors | Institutes | Trash
86 deriving (Generic, Enum, Bounded)
88 instance FromHttpApiData TabType
90 parseUrlPiece "Docs" = pure Docs
91 parseUrlPiece "Terms" = pure Terms
92 parseUrlPiece "Sources" = pure Sources
93 parseUrlPiece "Institutes" = pure Institutes
94 parseUrlPiece "Authors" = pure Authors
95 parseUrlPiece "Trash" = pure Trash
97 parseUrlPiece "Contacts" = pure Contacts
99 parseUrlPiece _ = Left "Unexpected value of TabType"
101 instance ToParamSchema TabType
102 instance ToJSON TabType
103 instance FromJSON TabType
104 instance ToSchema TabType
105 instance Arbitrary TabType
107 arbitrary = elements [minBound .. maxBound]
109 ------------------------------------------------------------------------
110 type NgramsTerm = Text
113 NgramsElement { _ne_ngrams :: NgramsTerm
114 , _ne_list :: ListType
115 , _ne_occurrences :: Int
116 , _ne_parent :: Maybe NgramsTerm
117 , _ne_children :: Set NgramsTerm
119 deriving (Ord, Eq, Show, Generic)
121 deriveJSON (unPrefix "_ne_") ''NgramsElement
122 makeLenses ''NgramsElement
124 instance ToSchema NgramsElement
125 instance Arbitrary NgramsElement where
126 arbitrary = elements [NgramsElement "sport" GraphList 1 Nothing mempty]
128 ------------------------------------------------------------------------
129 newtype NgramsTable = NgramsTable [NgramsElement]
130 deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
132 makePrisms ''NgramsTable
134 instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
135 each = _NgramsTable . each
138 -- | TODO Check N and Weight
139 toNgramsElement :: [NgramsTableData] -> [NgramsElement]
140 toNgramsElement ns = map toNgramsElement' ns
142 toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
146 Just x -> lookup x mapParent
147 c' = maybe mempty identity $ lookup t mapChildren
148 lt' = maybe (panic "API.Ngrams: listypeId") identity lt
150 mapParent :: Map Int Text
151 mapParent = fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
153 mapChildren :: Map Text (Set Text)
154 mapChildren = mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
156 $ map (first fromJust)
157 $ filter (isJust . fst)
158 $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
161 instance Arbitrary NgramsTable where
164 [ NgramsElement "animal" GraphList 1 Nothing (Set.fromList ["dog", "cat"])
165 , NgramsElement "cat" GraphList 1 (Just "animal") mempty
166 , NgramsElement "cats" StopList 4 Nothing mempty
167 , NgramsElement "dog" GraphList 3 (Just "animal")(Set.fromList ["dogs"])
168 , NgramsElement "dogs" StopList 4 (Just "dog") mempty
169 , NgramsElement "fox" GraphList 1 Nothing mempty
170 , NgramsElement "object" CandidateList 2 Nothing mempty
171 , NgramsElement "nothing" StopList 4 Nothing mempty
172 , NgramsElement "organic" GraphList 3 Nothing (Set.singleton "flower")
173 , NgramsElement "flower" GraphList 3 (Just "organic") mempty
174 , NgramsElement "moon" CandidateList 1 Nothing mempty
175 , NgramsElement "sky" StopList 1 Nothing mempty
178 instance ToSchema NgramsTable
180 ------------------------------------------------------------------------
181 type NgramsTableMap = Map NgramsTerm NgramsElement
183 ------------------------------------------------------------------------
184 -- On the Client side:
185 --data Action = InGroup NgramsId NgramsId
186 -- | OutGroup NgramsId NgramsId
187 -- | SetListType NgramsId ListType
189 data PatchSet a = PatchSet
193 deriving (Eq, Ord, Show, Generic)
195 makeLenses ''PatchSet
197 instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
198 arbitrary = PatchSet <$> arbitrary <*> arbitrary
200 type instance Patched (PatchSet a) = Set a
202 type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
203 type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
205 instance Ord a => Semigroup (PatchSet a) where
206 p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
207 , _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
210 instance Ord a => Monoid (PatchSet a) where
211 mempty = PatchSet mempty mempty
213 instance Ord a => Group (PatchSet a) where
214 invert (PatchSet r a) = PatchSet a r
216 instance Ord a => Composable (PatchSet a) where
217 composable _ _ = mempty
219 instance Ord a => Action (PatchSet a) (Set a) where
220 act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
222 instance Applicable (PatchSet a) (Set a) where
223 applicable _ _ = mempty
225 instance Ord a => Validity (PatchSet a) where
226 validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
228 instance Ord a => Transformable (PatchSet a) where
229 transformable = undefined
231 conflicts _p _q = undefined
233 transformWith conflict p q = undefined conflict p q
235 instance ToJSON a => ToJSON (PatchSet a) where
236 toJSON = genericToJSON $ unPrefix "_"
237 toEncoding = genericToEncoding $ unPrefix "_"
239 instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
240 parseJSON = genericParseJSON $ unPrefix "_"
242 instance ToSchema a => ToSchema (PatchSet a)
244 instance ToSchema a => ToSchema (Replace a) where
245 declareNamedSchema (_ :: proxy (Replace a)) = do
246 -- TODO Keep constructor is not supported here.
247 aSchema <- declareSchemaRef (Proxy :: Proxy a)
248 return $ NamedSchema (Just "Replace") $ mempty
249 & type_ .~ SwaggerObject
251 InsOrdHashMap.fromList
255 & required .~ [ "old", "new" ]
258 NgramsPatch { _patch_children :: PatchSet NgramsTerm
259 , _patch_list :: Replace ListType -- TODO Map UserId ListType
261 deriving (Ord, Eq, Show, Generic)
263 deriveJSON (unPrefix "_") ''NgramsPatch
264 makeLenses ''NgramsPatch
266 instance ToSchema NgramsPatch
268 instance Arbitrary NgramsPatch where
269 arbitrary = NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)
271 type NgramsPatchIso = PairPatch (PatchSet NgramsTerm) (Replace ListType)
273 _NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
274 _NgramsPatch = iso (\(NgramsPatch c l) -> c :*: l) (\(c :*: l) -> NgramsPatch c l)
276 instance Semigroup NgramsPatch where
277 p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
279 instance Monoid NgramsPatch where
280 mempty = _NgramsPatch # mempty
282 instance Validity NgramsPatch where
283 validate p = p ^. _NgramsPatch . to validate
285 instance Transformable NgramsPatch where
286 transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
288 conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
290 transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
292 (p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
294 type ConflictResolutionNgramsPatch =
295 ( ConflictResolutionPatchSet NgramsTerm
296 , ConflictResolutionReplace ListType
298 type instance ConflictResolution NgramsPatch =
299 ConflictResolutionNgramsPatch
301 type PatchedNgramsPatch = (Set NgramsTerm, ListType)
302 -- ~ Patched NgramsPatchIso
303 type instance Patched NgramsPatch = PatchedNgramsPatch
305 instance Applicable NgramsPatch (Maybe NgramsElement) where
306 applicable p Nothing = check (p == mempty) "NgramsPatch should be empty here"
307 applicable p (Just ne) =
308 -- TODO how to patch _ne_parent ?
309 applicable (p ^. patch_children) (ne ^. ne_children) <>
310 applicable (p ^. patch_list) (ne ^. ne_list)
312 instance Action NgramsPatch (Maybe NgramsElement) where
313 act _ Nothing = Nothing
315 -- TODO how to patch _ne_parent ?
316 ne & ne_children %~ act (p ^. patch_children)
317 & ne_list %~ act (p ^. patch_list)
320 type PatchMap = PM.PatchMap
322 newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
323 deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
325 --instance (Ord k, Action pv (Maybe v)) => Action (PatchMap k pv) (Map k v) where
327 type instance ConflictResolution NgramsTablePatch =
328 NgramsTerm -> ConflictResolutionNgramsPatch
330 type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
331 -- ~ Patched (PatchMap NgramsTerm NgramsPatch)
332 type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
334 makePrisms ''NgramsTablePatch
335 instance ToSchema (PatchMap NgramsTerm NgramsPatch)
336 instance ToSchema NgramsTablePatch
338 instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
339 applicable p = applicable (p ^. _NgramsTablePatch)
341 instance Action NgramsTablePatch (Maybe NgramsTableMap) where
342 act p = act (p ^. _NgramsTablePatch)
343 -- (v ^? _Just . _NgramsTable)
344 -- ^? _Just . from _NgramsTable
346 instance Arbitrary NgramsTablePatch where
347 arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
349 -- Should it be less than an Lens' to preserve PatchMap's abstraction.
350 -- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
351 -- ntp_ngrams_patches = _NgramsTablePatch . undefined
353 -- TODO: replace by mempty once we have the Monoid instance
354 emptyNgramsTablePatch :: NgramsTablePatch
355 emptyNgramsTablePatch = NgramsTablePatch mempty
357 ------------------------------------------------------------------------
358 ------------------------------------------------------------------------
361 data Versioned a = Versioned
362 { _v_version :: Version
366 deriveJSON (unPrefix "_v_") ''Versioned
367 makeLenses ''Versioned
368 instance ToSchema a => ToSchema (Versioned a)
369 instance Arbitrary a => Arbitrary (Versioned a) where
370 arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
373 -- TODO sequencs of modifications (Patchs)
374 type NgramsIdPatch = Patch NgramsId NgramsPatch
376 ngramsPatch :: Int -> NgramsPatch
377 ngramsPatch n = NgramsPatch (DM.fromList [(1, StopList)]) (Set.fromList [n]) Set.empty
379 toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
380 toEdit n p = Edit n p
381 ngramsIdPatch :: Patch NgramsId NgramsPatch
382 ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
383 , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
384 , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
387 -- applyPatchBack :: Patch -> IO Patch
388 -- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
390 ------------------------------------------------------------------------
391 ------------------------------------------------------------------------
392 ------------------------------------------------------------------------
394 type TableNgramsApiGet = Summary " Table Ngrams API Get"
395 :> QueryParam "ngramsType" TabType
396 :> QueryParam "list" ListId
397 :> QueryParam "limit" Limit
398 :> QueryParam "offset" Offset
399 :> Get '[JSON] (Versioned NgramsTable)
401 type TableNgramsApi = Summary " Table Ngrams API Change"
402 :> QueryParam "ngramsType" TabType
403 :> QueryParam "list" ListId
404 :> ReqBody '[JSON] (Versioned NgramsTablePatch)
405 :> Put '[JSON] (Versioned NgramsTablePatch)
407 data NgramError = UnsupportedVersion
410 class HasNgramError e where
411 _NgramError :: Prism' e NgramError
413 instance HasNgramError ServantErr where
414 _NgramError = prism' make match
416 err = err500 { errBody = "NgramError: Unsupported version" }
417 make UnsupportedVersion = err
418 match e = guard (e == err) $> UnsupportedVersion
420 ngramError :: (MonadError e m, HasNgramError e) => NgramError -> m a
421 ngramError nne = throwError $ _NgramError # nne
424 -- TODO: Replace.old is ignored which means that if the current list
425 -- `GraphList` and that the patch is `Replace CandidateList StopList` then
426 -- the list is going to be `StopList` while it should keep `GraphList`.
427 -- However this should not happen in non conflicting situations.
428 mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
429 mkListsUpdate nt patches =
430 [ (ngramsTypeId nt, ng, listTypeId lt)
431 | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
432 , lt <- patch ^.. patch_list . new
435 mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
438 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
439 mkChildrenGroups addOrRem nt patches =
440 [ (ngramsTypeId nt, parent, child)
441 | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
442 , child <- patch ^.. patch_children . to addOrRem . folded
446 ngramsTypeFromTabType :: Maybe TabType -> NgramsType
447 ngramsTypeFromTabType maybeTabType =
448 let lieu = "Garg.API.Ngrams: " :: Text in
450 Nothing -> panic (lieu <> "Indicate the Table")
451 Just tab -> case tab of
452 Sources -> Ngrams.Sources
453 Authors -> Ngrams.Authors
454 Institutes -> Ngrams.Institutes
455 Terms -> Ngrams.NgramsTerms
456 _ -> panic $ lieu <> "No Ngrams for this tab"
458 ------------------------------------------------------------------------
460 { _r_version :: Version
463 -- ^ first patch in the list is the most recent
468 initRepo :: Monoid s => Repo s p
469 initRepo = Repo 1 mempty []
471 type NgramsState = Map ListId (Map NgramsType NgramsTableMap)
472 type NgramsStatePatch = PatchMap ListId (PatchMap NgramsType NgramsTablePatch)
473 type NgramsRepo = Repo NgramsState NgramsStatePatch
475 class HasRepoVar env where
476 repoVar :: Getter env (MVar NgramsRepo)
478 instance HasRepoVar (MVar NgramsRepo) where
481 type RepoCmdM env err m =
486 ------------------------------------------------------------------------
488 listTypeConflictResolution :: ListType -> ListType -> ListType
489 listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
491 ngramsStatePatchConflictResolution
492 :: ListId -> NgramsType -> NgramsTerm
493 -> ConflictResolutionNgramsPatch
494 ngramsStatePatchConflictResolution _listId _ngramsType _ngramsTerm
495 = ((<>) {- TODO think this through -}, listTypeConflictResolution)
497 makePrisms ''PM.PatchMap
499 class HasInvalidError e where
500 _InvalidError :: Prism' e Validation
502 instance HasInvalidError ServantErr where
503 _InvalidError = undefined {-prism' make match
505 err = err500 { errBody = "InvalidError" }
507 match e = guard (e == err) $> UnsupportedVersion-}
509 assertValid :: (MonadError e m, HasInvalidError e) => Validation -> m ()
510 assertValid v = when (not $ validationIsValid v) $ throwError $ _InvalidError # v
512 -- Apply the given patch to the DB and returns the patch to be applied on the
515 -- In this perliminary version the OT aspect is missing, therefore the version
516 -- number is always 1 and the returned patch is always empty.
517 tableNgramsPatch :: (HasNgramError err, HasNodeError err, HasInvalidError err,
519 => CorpusId -> Maybe TabType -> Maybe ListId
520 -> Versioned NgramsTablePatch
521 -> m (Versioned NgramsTablePatch)
522 tableNgramsPatch corpusId maybeTabType maybeList (Versioned p_version p_table) = do
523 let ngramsType = ngramsTypeFromTabType maybeTabType
524 listId <- maybe (defaultList corpusId) pure maybeList
525 let (p0, p0_validity) = PM.singleton ngramsType p_table
526 let (p, p_validity) = PM.singleton listId p0
528 assertValid p0_validity
529 assertValid p_validity
532 (p'_applicable, vq') <- liftIO $ modifyMVar var $ \r ->
534 q = mconcat $ take (r ^. r_version - p_version) (r ^. r_history)
535 (p', q') = transformWith ngramsStatePatchConflictResolution p q
536 r' = r & r_version +~ 1
538 & r_history %~ (p' :)
539 q'_table = q' ^. _PatchMap . at listId . _Just . _PatchMap . at ngramsType . _Just
540 p'_applicable = applicable p' (r ^. r_state)
542 pure (r', (p'_applicable, Versioned (r' ^. r_version) q'_table))
543 assertValid p'_applicable
547 when (version /= 1) $ ngramError UnsupportedVersion
548 updateNodeNgrams $ NodeNgramsUpdate
549 { _nnu_user_list_id = listId
550 , _nnu_lists_update = mkListsUpdate ngramsType patch
551 , _nnu_rem_children = mkChildrenGroups _rem ngramsType patch
552 , _nnu_add_children = mkChildrenGroups _add ngramsType patch
554 pure $ Versioned 1 emptyNgramsTablePatch
557 -- | TODO Errors management
558 -- TODO: polymorphic for Annuaire or Corpus or ...
559 getTableNgrams :: RepoCmdM env err m
560 => CorpusId -> Maybe TabType
561 -> Maybe ListId -> Maybe Limit -> Maybe Offset
562 -- -> Maybe MinSize -> Maybe MaxSize
564 -- -> Maybe Text -- full text search
565 -> m (Versioned NgramsTable)
566 getTableNgrams cId maybeTabType maybeListId mlimit moffset = do
567 let ngramsType = ngramsTypeFromTabType maybeTabType
568 listId <- maybe (defaultList cId) pure maybeListId
571 defaultLimit = 10 -- TODO
572 limit_ = maybe defaultLimit identity mlimit
573 offset_ = maybe 0 identity moffset
576 repo <- liftIO $ readMVar v
578 let ngrams = repo ^.. r_state
580 . at ngramsType . _Just
581 . taking limit_ (dropping offset_ each)
583 pure $ Versioned (repo ^. r_version) (NgramsTable ngrams)
587 Ngrams.getNgramsTableDb NodeDocument ngramsType (Ngrams.NgramsTableParam listId cId) limit_ offset_
589 -- printDebug "ngramsTableDatas" ngramsTableDatas
591 pure $ Versioned 1 $ NgramsTable (toNgramsElement ngramsTableDatas)