2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Corpus export
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Main exports of Gargantext:
16 module Gargantext.API.Node.Corpus.Export
20 import Data.Maybe (fromMaybe)
22 import Data.Text (Text)
23 import qualified Data.List as List
24 import qualified Data.Map as Map
25 import qualified Data.Set as Set
26 import qualified Data.HashMap.Strict as HashMap
28 import Gargantext.API.Node.Corpus.Export.Types
29 import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
30 import Gargantext.API.Ngrams.Types
31 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo')
32 import Gargantext.API.Prelude (GargNoServer)
33 import Gargantext.Prelude.Crypto.Hash (hash)
34 import Gargantext.Core.Types
35 import Gargantext.Core.NodeStory
36 import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
37 import Gargantext.Database.Admin.Config (userMaster)
38 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
39 import Gargantext.Database.Prelude (Cmd)
40 import Gargantext.Database.Query.Table.Node
41 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
42 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
43 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
44 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
45 import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
46 import Gargantext.Prelude
48 --------------------------------------------------
49 -- | Hashes are ordered by Set
53 -> GargNoServer Corpus
54 getCorpus cId lId nt' = do
58 Nothing -> NgramsTerms
62 Nothing -> defaultList cId
66 <$> map (\n -> (_context_id n, n))
67 <$> selectDocNodes cId
69 repo <- getRepo' [listId]
70 ngs <- getContextNgrams cId listId MapTerm nt repo
71 let -- uniqId is hash computed already for each document imported in database
72 r = Map.intersectionWith
73 (\a b -> DocumentExport.Document { _d_document = context2node a
74 , _d_ngrams = DocumentExport.Ngrams (Set.toList b) (hash b)
75 , _d_hash = d_hash a b }
76 ) ns (Map.map (Set.map unNgramsTerm) ngs)
78 d_hash :: Context HyperdataDocument -> Set Text -> Text
79 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
82 pure $ Corpus { _c_corpus = Map.elems r
83 , _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
85 getContextNgrams :: HasNodeError err
91 -> Cmd err (Map ContextId (Set NgramsTerm))
92 getContextNgrams cId lId listType nt repo = do
93 -- lId <- case lId' of
94 -- Nothing -> defaultList cId
97 lIds <- selectNodesWithUsername NodeList userMaster
98 let ngs = filterListWithRoot listType $ mapTermListRoot [lId] nt repo
100 r <- getNgramsByContextOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
105 -- Version number of the list