{-|
Module : Gargantext.API.Node.Corpus.Export
-Description : Get Metrics from Storage (Database like)
+Description : Corpus export
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
- lists
-}
-{-# LANGUAGE TemplateHaskell #-}
-{-# LANGUAGE TypeOperators #-}
-
module Gargantext.API.Node.Corpus.Export
where
-import Data.Aeson.TH (deriveJSON)
-import qualified Data.List as List
-import qualified Data.Map as Map
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Data.Set (Set)
-import qualified Data.Set as Set
-import Data.Swagger
import Data.Text (Text)
-import GHC.Generics (Generic)
-import Servant
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.HashMap.Strict as HashMap
-import Gargantext.API.Ngrams
+import Gargantext.API.Node.Corpus.Export.Types
+import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
+import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Prelude (GargNoServer)
-import Gargantext.Core.Types --
-import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
-import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
+import Gargantext.Prelude.Crypto.Hash (hash)
+import Gargantext.Core.Types
+import Gargantext.Core.NodeStory
+import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
-import Gargantext.Database.Admin.Types.Node (Node, NodeId, ListId, CorpusId)
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node
-import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
-import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
-import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
+import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
+import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
+import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
import Gargantext.Prelude
-import Gargantext.Prelude.Utils (hashFromSet, hashFromList)
-
-
--- Corpus Export
-data Corpus =
- Corpus { _c_corpus :: [Document]
- , _c_hash :: Hash
- } deriving (Generic)
-
--- | Document Export
-data Document =
- Document { _d_document :: Node HyperdataDocument
- , _d_ngrams :: Ngrams
- , _d_hash :: Hash
- } deriving (Generic)
-
-data Ngrams =
- Ngrams { _ng_ngrams :: [Text]
- , _ng_hash :: Hash
- } deriving (Generic)
-
-type Hash = Text
--------
-instance ToSchema Corpus where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
-
-instance ToSchema Document where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
-
-instance ToSchema Ngrams where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
-
--------
-instance ToParamSchema Corpus where
- toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
-
-instance ToParamSchema Document where
- toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
-
-instance ToParamSchema Ngrams where
- toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
---------------------------------------------------
-type API = Summary "Corpus Export"
- :> "export"
- :> QueryParam "listId" ListId
- :> QueryParam "ngramsType" NgramsType
- :> Get '[JSON] Corpus
--------------------------------------------------
-- | Hashes are ordered by Set
Nothing -> NgramsTerms
Just t -> t
+ listId <- case lId of
+ Nothing -> defaultList cId
+ Just l -> pure l
+
ns <- Map.fromList
- <$> map (\n -> (_node_id n, n))
+ <$> map (\n -> (_context_id n, n))
<$> selectDocNodes cId
- repo <- getRepo
- ngs <- getNodeNgrams cId lId nt repo
+
+ repo <- getRepo [listId]
+ ngs <- getContextNgrams cId listId MapTerm nt repo
let -- uniqId is hash computed already for each document imported in database
- r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hashFromSet b)) (d_hash a b)
- ) ns ngs
+ r = Map.intersectionWith
+ (\a b -> DocumentExport.Document { _d_document = context2node a
+ , _d_ngrams = DocumentExport.Ngrams (Set.toList b) (hash b)
+ , _d_hash = d_hash a b }
+ ) ns (Map.map (Set.map unNgramsTerm) ngs)
where
- d_hash a b = hashFromList [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
- , hashFromSet b
- ]
- pure $ Corpus (Map.elems r) (hashFromList $ List.map _d_hash
- $ Map.elems r
- )
-
-getNodeNgrams :: HasNodeError err
+ d_hash :: Context HyperdataDocument -> Set Text -> Text
+ d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
+ , hash b
+ ]
+ pure $ Corpus { _c_corpus = Map.elems r
+ , _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
+
+getContextNgrams :: HasNodeError err
=> CorpusId
- -> Maybe ListId
+ -> ListId
+ -> ListType
-> NgramsType
- -> NgramsRepo
- -> Cmd err (Map NodeId (Set Text))
-getNodeNgrams cId lId' nt repo = do
- lId <- case lId' of
- Nothing -> defaultList cId
- Just l -> pure l
+ -> NodeListStory
+ -> Cmd err (Map ContextId (Set NgramsTerm))
+getContextNgrams cId lId listType nt repo = do
+-- lId <- case lId' of
+-- Nothing -> defaultList cId
+-- Just l -> pure l
lIds <- selectNodesWithUsername NodeList userMaster
- let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
- r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
+ let ngs = filterListWithRoot [listType] $ mapTermListRoot [lId] nt repo
+ -- TODO HashMap
+ r <- getNgramsByContextOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
pure r
-
-$(deriveJSON (unPrefix "_c_") ''Corpus)
-$(deriveJSON (unPrefix "_d_") ''Document)
-$(deriveJSON (unPrefix "_ng_") ''Ngrams)
-
-
-- TODO
-- Exports List
-- Version number of the list
-
-