]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
[annuaire] remove duplicated AsyncJobs type
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Corpus export
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 module Gargantext.API.Node.Corpus.Export
17 where
18
19 import Data.Map.Strict (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Set (Set)
22 import Data.Text (Text, pack)
23 import qualified Data.List as List
24 import qualified Data.Map.Strict as Map
25 import qualified Data.Set as Set
26 import qualified Data.HashMap.Strict as HashMap
27 import Servant (Headers, Header, addHeader)
28
29 import Gargantext.API.Node.Corpus.Export.Types
30 import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
31 import Gargantext.API.Ngrams.Types
32 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
33 import Gargantext.API.Prelude (GargNoServer)
34 import Gargantext.Prelude.Crypto.Hash (hash)
35 import Gargantext.Core.Types
36 import Gargantext.Core.NodeStory
37 import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
38 import Gargantext.Database.Admin.Config (userMaster)
39 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
40 import Gargantext.Database.Prelude (Cmd)
41 import Gargantext.Database.Query.Table.Node
42 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
43 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
44 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
45 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
46 import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
47 import Gargantext.Prelude
48
49 --------------------------------------------------
50 -- | Hashes are ordered by Set
51 getCorpus :: CorpusId
52 -> Maybe ListId
53 -> Maybe NgramsType
54 -> GargNoServer (Headers '[Header "Content-Disposition" Text] Corpus)
55 getCorpus cId lId nt' = do
56
57 let
58 nt = case nt' of
59 Nothing -> NgramsTerms
60 Just t -> t
61
62 listId <- case lId of
63 Nothing -> defaultList cId
64 Just l -> pure l
65
66 ns <- Map.fromList
67 <$> map (\n -> (_context_id n, n))
68 <$> selectDocNodes cId
69
70 repo <- getRepo [listId]
71 ngs <- getContextNgrams cId listId MapTerm nt repo
72 let -- uniqId is hash computed already for each document imported in database
73 r = Map.intersectionWith
74 (\a b -> DocumentExport.Document { _d_document = context2node a
75 , _d_ngrams = DocumentExport.Ngrams (Set.toList b) (hash b)
76 , _d_hash = d_hash a b }
77 ) ns (Map.map (Set.map unNgramsTerm) ngs)
78 where
79 d_hash :: Context HyperdataDocument -> Set Text -> Text
80 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
81 , hash b
82 ]
83 pure $ addHeader ("attachment; filename=GarganText_corpus-" <> (pack $ show cId) <> ".json")
84 $ Corpus { _c_corpus = Map.elems r
85 , _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
86
87 getContextNgrams :: HasNodeError err
88 => CorpusId
89 -> ListId
90 -> ListType
91 -> NgramsType
92 -> NodeListStory
93 -> Cmd err (Map ContextId (Set NgramsTerm))
94 getContextNgrams cId lId listType nt repo = do
95 -- lId <- case lId' of
96 -- Nothing -> defaultList cId
97 -- Just l -> pure l
98
99 lIds <- selectNodesWithUsername NodeList userMaster
100 let ngs = filterListWithRoot [listType] $ mapTermListRoot [lId] nt repo
101 -- TODO HashMap
102 r <- getNgramsByContextOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
103 pure r
104
105 -- TODO
106 -- Exports List
107 -- Version number of the list