- lists
-}
-{-# LANGUAGE TemplateHaskell #-}
-{-# LANGUAGE TypeOperators #-}
-
module Gargantext.API.Node.Corpus.Export
where
-import Data.Aeson.TH (deriveJSON)
-import qualified Data.List as List
-import qualified Data.Map as Map
+
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Data.Set (Set)
-import qualified Data.Set as Set
-import Data.Swagger
import Data.Text (Text)
-import GHC.Generics (Generic)
-import Servant
-
-import Gargantext.API.Ngrams
+import Gargantext.API.Node.Corpus.Export.Types
+import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Prelude (GargNoServer)
-import Gargantext.Core.Types --
-import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
+import Gargantext.Prelude.Crypto.Hash (hash)
+import Gargantext.Core.Types
import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
-import Gargantext.Database.Admin.Types.Node (Node, NodeId, ListId, CorpusId)
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node
-import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
+import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
-import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
+import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
import Gargantext.Prelude
-import Gargantext.Prelude.Utils (sha)
-
-
--- Corpus Export
-data Corpus =
- Corpus { _c_corpus :: [Document]
- , _c_hash :: Hash
- } deriving (Generic)
-
--- | Document Export
-data Document =
- Document { _d_document :: Node HyperdataDocument
- , _d_ngrams :: Ngrams
- , _d_hash :: Hash
- } deriving (Generic)
-
-data Ngrams =
- Ngrams { _ng_ngrams :: [Text]
- , _ng_hash :: Hash
- } deriving (Generic)
-
-type Hash = Text
--------
-instance ToSchema Corpus where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
-
-instance ToSchema Document where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
-
-instance ToSchema Ngrams where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
-
--------
-instance ToParamSchema Corpus where
- toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
-
-instance ToParamSchema Document where
- toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
-
-instance ToParamSchema Ngrams where
- toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
---------------------------------------------------
-type API = Summary "Corpus Export"
- :> "export"
- :> QueryParam "listId" ListId
- :> QueryParam "ngramsType" NgramsType
- :> Get '[JSON] Corpus
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.Set as Set
--------------------------------------------------
+-- | Hashes are ordered by Set
getCorpus :: CorpusId
-> Maybe ListId
-> Maybe NgramsType
repo <- getRepo
ngs <- getNodeNgrams cId lId nt repo
let -- uniqId is hash computed already for each document imported in database
- r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
+ r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hash b)) (d_hash a b)
) ns ngs
where
- ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
- d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
- <> (ng_hash b)
-
- pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
- $ List.map _d_hash $ Map.elems r
+ d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
+ , hash b
+ ]
+ pure $ Corpus (Map.elems r) (hash $ List.map _d_hash
+ $ Map.elems r
)
getNodeNgrams :: HasNodeError err
r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
pure r
-
-$(deriveJSON (unPrefix "_c_") ''Corpus)
-$(deriveJSON (unPrefix "_d_") ''Document)
-$(deriveJSON (unPrefix "_ng_") ''Ngrams)
-
-
-- TODO
-- Exports List
--- Version number of the list
-
-
+-- Version number of the list
\ No newline at end of file