2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Corpus export
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Main exports of Gargantext:
16 module Gargantext.API.Node.Corpus.Export
20 import Data.Maybe (fromMaybe)
22 import Data.Text (Text, pack)
23 import qualified Data.List as List
24 import qualified Data.Map as Map
25 import qualified Data.Set as Set
26 import qualified Data.HashMap.Strict as HashMap
27 import Servant (Headers, Header, addHeader)
29 import Gargantext.API.Node.Corpus.Export.Types
30 import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
31 import Gargantext.API.Ngrams.Types
32 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
33 import Gargantext.API.Prelude (GargNoServer)
34 import Gargantext.Prelude.Crypto.Hash (hash)
35 import Gargantext.Core.Types
36 import Gargantext.Core.NodeStory
37 import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
38 import Gargantext.Database.Admin.Config (userMaster)
39 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
40 import Gargantext.Database.Prelude (Cmd)
41 import Gargantext.Database.Query.Table.Node
42 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
43 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
44 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
45 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
46 import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
47 import Gargantext.Prelude
49 --------------------------------------------------
50 -- | Hashes are ordered by Set
54 -> GargNoServer (Headers '[Header "Content-Disposition" Text] Corpus)
55 getCorpus cId lId nt' = do
59 Nothing -> NgramsTerms
63 Nothing -> defaultList cId
67 <$> map (\n -> (_context_id n, n))
68 <$> selectDocNodes cId
70 repo <- getRepo [listId]
71 ngs <- getContextNgrams cId listId MapTerm nt repo
72 let -- uniqId is hash computed already for each document imported in database
73 r = Map.intersectionWith
74 (\a b -> DocumentExport.Document { _d_document = context2node a
75 , _d_ngrams = DocumentExport.Ngrams (Set.toList b) (hash b)
76 , _d_hash = d_hash a b }
77 ) ns (Map.map (Set.map unNgramsTerm) ngs)
79 d_hash :: Context HyperdataDocument -> Set Text -> Text
80 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
83 pure $ addHeader ("attachment; filename=GarganText_corpus-" <> (pack $ show cId) <> ".json")
84 $ Corpus { _c_corpus = Map.elems r
85 , _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
87 getContextNgrams :: HasNodeError err
93 -> Cmd err (Map ContextId (Set NgramsTerm))
94 getContextNgrams cId lId listType nt repo = do
95 -- lId <- case lId' of
96 -- Nothing -> defaultList cId
99 lIds <- selectNodesWithUsername NodeList userMaster
100 let ngs = filterListWithRoot [listType] $ mapTermListRoot [lId] nt repo
102 r <- getNgramsByContextOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
107 -- Version number of the list