]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
[frameUpload] implement CSV v3 upload for corpus
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 module Gargantext.API.Node.Corpus.Export
17 where
18
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Set (Set)
22 import qualified Data.List as List
23 import qualified Data.Map as Map
24 import qualified Data.Set as Set
25 import qualified Data.HashMap.Strict as HashMap
26
27 import Gargantext.API.Node.Corpus.Export.Types
28 import Gargantext.API.Ngrams.Types
29 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
30 import Gargantext.API.Prelude (GargNoServer)
31 import Gargantext.Prelude.Crypto.Hash (hash)
32 import Gargantext.Core.Types
33 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
34 import Gargantext.Database.Admin.Config (userMaster)
35 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
36 import Gargantext.Database.Prelude (Cmd)
37 import Gargantext.Database.Query.Table.Node
38 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
39 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
40 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
41 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
42 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
43 import Gargantext.Prelude
44
45 --------------------------------------------------
46 -- | Hashes are ordered by Set
47 getCorpus :: CorpusId
48 -> Maybe ListId
49 -> Maybe NgramsType
50 -> GargNoServer Corpus
51 getCorpus cId lId nt' = do
52
53 let
54 nt = case nt' of
55 Nothing -> NgramsTerms
56 Just t -> t
57
58 ns <- Map.fromList
59 <$> map (\n -> (_node_id n, n))
60 <$> selectDocNodes cId
61 repo <- getRepo
62 ngs <- getNodeNgrams cId lId nt repo
63 let -- uniqId is hash computed already for each document imported in database
64 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hash b)) (d_hash a b)
65 ) ns (Map.map (Set.map unNgramsTerm) ngs)
66 where
67 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
68 , hash b
69 ]
70 pure $ Corpus (Map.elems r) (hash $ List.map _d_hash
71 $ Map.elems r
72 )
73
74 getNodeNgrams :: HasNodeError err
75 => CorpusId
76 -> Maybe ListId
77 -> NgramsType
78 -> NgramsRepo
79 -> Cmd err (Map NodeId (Set NgramsTerm))
80 getNodeNgrams cId lId' nt repo = do
81 lId <- case lId' of
82 Nothing -> defaultList cId
83 Just l -> pure l
84
85 lIds <- selectNodesWithUsername NodeList userMaster
86 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
87 -- TODO HashMap
88 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
89 pure r
90
91 -- TODO
92 -- Exports List
93 -- Version number of the list