]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
[conduit] some work towards migrating file parser to conduit (does not compile)
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Corpus export
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 module Gargantext.API.Node.Corpus.Export
17 where
18
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Set (Set)
22 import Data.Text (Text)
23 import qualified Data.List as List
24 import qualified Data.Map as Map
25 import qualified Data.Set as Set
26 import qualified Data.HashMap.Strict as HashMap
27
28 import Gargantext.API.Node.Corpus.Export.Types
29 import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
30 import Gargantext.API.Ngrams.Types
31 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo')
32 import Gargantext.API.Prelude (GargNoServer)
33 import Gargantext.Prelude.Crypto.Hash (hash)
34 import Gargantext.Core.Types
35 import Gargantext.Core.NodeStory
36 import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
37 import Gargantext.Database.Admin.Config (userMaster)
38 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
39 import Gargantext.Database.Prelude (Cmd)
40 import Gargantext.Database.Query.Table.Node
41 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
42 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
43 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
44 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
45 import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
46 import Gargantext.Prelude
47
48 --------------------------------------------------
49 -- | Hashes are ordered by Set
50 getCorpus :: CorpusId
51 -> Maybe ListId
52 -> Maybe NgramsType
53 -> GargNoServer Corpus
54 getCorpus cId lId nt' = do
55
56 let
57 nt = case nt' of
58 Nothing -> NgramsTerms
59 Just t -> t
60
61 listId <- case lId of
62 Nothing -> defaultList cId
63 Just l -> pure l
64
65 ns <- Map.fromList
66 <$> map (\n -> (_context_id n, n))
67 <$> selectDocNodes cId
68
69 repo <- getRepo' [listId]
70 ngs <- getContextNgrams cId listId MapTerm nt repo
71 let -- uniqId is hash computed already for each document imported in database
72 r = Map.intersectionWith
73 (\a b -> DocumentExport.Document { _d_document = context2node a
74 , _d_ngrams = DocumentExport.Ngrams (Set.toList b) (hash b)
75 , _d_hash = d_hash a b }
76 ) ns (Map.map (Set.map unNgramsTerm) ngs)
77 where
78 d_hash :: Context HyperdataDocument -> Set Text -> Text
79 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
80 , hash b
81 ]
82 pure $ Corpus { _c_corpus = Map.elems r
83 , _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
84
85 getContextNgrams :: HasNodeError err
86 => CorpusId
87 -> ListId
88 -> ListType
89 -> NgramsType
90 -> NodeListStory
91 -> Cmd err (Map ContextId (Set NgramsTerm))
92 getContextNgrams cId lId listType nt repo = do
93 -- lId <- case lId' of
94 -- Nothing -> defaultList cId
95 -- Just l -> pure l
96
97 lIds <- selectNodesWithUsername NodeList userMaster
98 let ngs = filterListWithRoot listType $ mapTermListRoot [lId] nt repo
99 -- TODO HashMap
100 r <- getNgramsByContextOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
101 pure r
102
103 -- TODO
104 -- Exports List
105 -- Version number of the list