2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Corpus export
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Main exports of Gargantext:
16 module Gargantext.API.Node.Corpus.Export
20 import Data.Maybe (fromMaybe)
22 import qualified Data.List as List
23 import qualified Data.Map as Map
24 import qualified Data.Set as Set
25 import qualified Data.HashMap.Strict as HashMap
27 import Gargantext.API.Node.Corpus.Export.Types
28 import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
29 import Gargantext.API.Ngrams.Types
30 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo')
31 import Gargantext.API.Prelude (GargNoServer)
32 import Gargantext.Prelude.Crypto.Hash (hash)
33 import Gargantext.Core.Types
34 import Gargantext.Core.NodeStory
35 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
36 import Gargantext.Database.Admin.Config (userMaster)
37 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
38 import Gargantext.Database.Prelude (Cmd)
39 import Gargantext.Database.Query.Table.Node
40 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
41 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
42 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
43 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
44 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
45 import Gargantext.Prelude
47 --------------------------------------------------
48 -- | Hashes are ordered by Set
52 -> GargNoServer Corpus
53 getCorpus cId lId nt' = do
57 Nothing -> NgramsTerms
61 Nothing -> defaultList cId
65 <$> map (\n -> (_node_id n, n))
66 <$> selectDocNodes cId
68 repo <- getRepo' [listId]
69 ngs <- getNodeNgrams cId listId nt repo
70 let -- uniqId is hash computed already for each document imported in database
71 r = Map.intersectionWith
72 (\a b -> DocumentExport.Document { _d_document = a
73 , _d_ngrams = DocumentExport.Ngrams (Set.toList b) (hash b)
74 , _d_hash = d_hash a b }
75 ) ns (Map.map (Set.map unNgramsTerm) ngs)
77 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
80 pure $ Corpus { _c_corpus = Map.elems r
81 , _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
83 getNodeNgrams :: HasNodeError err
88 -> Cmd err (Map NodeId (Set NgramsTerm))
89 getNodeNgrams cId lId nt repo = do
90 -- lId <- case lId' of
91 -- Nothing -> defaultList cId
94 lIds <- selectNodesWithUsername NodeList userMaster
95 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
97 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
102 -- Version number of the list