]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
[upload zip] some more work on zipfile parsing
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 module Gargantext.API.Node.Corpus.Export
17 where
18
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Set (Set)
22 import qualified Data.List as List
23 import qualified Data.Map as Map
24 import qualified Data.Set as Set
25 import qualified Data.HashMap.Strict as HashMap
26
27 import Gargantext.API.Node.Corpus.Export.Types
28 import Gargantext.API.Ngrams.Types
29 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo')
30 import Gargantext.API.Prelude (GargNoServer)
31 import Gargantext.Prelude.Crypto.Hash (hash)
32 import Gargantext.Core.Types
33 import Gargantext.Core.NodeStory
34 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
35 import Gargantext.Database.Admin.Config (userMaster)
36 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
37 import Gargantext.Database.Prelude (Cmd)
38 import Gargantext.Database.Query.Table.Node
39 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
40 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
41 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
42 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
43 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
44 import Gargantext.Prelude
45
46 --------------------------------------------------
47 -- | Hashes are ordered by Set
48 getCorpus :: CorpusId
49 -> Maybe ListId
50 -> Maybe NgramsType
51 -> GargNoServer Corpus
52 getCorpus cId lId nt' = do
53
54 let
55 nt = case nt' of
56 Nothing -> NgramsTerms
57 Just t -> t
58
59 ns <- Map.fromList
60 <$> map (\n -> (_node_id n, n))
61 <$> selectDocNodes cId
62
63 repo <- getRepo' [fromMaybe (panic "[Gargantext.API.Node.Corpus.Export]") lId]
64 ngs <- getNodeNgrams cId lId nt repo
65 let -- uniqId is hash computed already for each document imported in database
66 r = Map.intersectionWith
67 (\a b -> Document { _d_document = a
68 , _d_ngrams = Ngrams (Set.toList b) (hash b)
69 , _d_hash = d_hash a b }
70 ) ns (Map.map (Set.map unNgramsTerm) ngs)
71 where
72 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
73 , hash b
74 ]
75 pure $ Corpus { _c_corpus = Map.elems r
76 , _c_hash = hash $ List.map _d_hash $ Map.elems r }
77
78 getNodeNgrams :: HasNodeError err
79 => CorpusId
80 -> Maybe ListId
81 -> NgramsType
82 -> NodeListStory
83 -> Cmd err (Map NodeId (Set NgramsTerm))
84 getNodeNgrams cId lId' nt repo = do
85 lId <- case lId' of
86 Nothing -> defaultList cId
87 Just l -> pure l
88
89 lIds <- selectNodesWithUsername NodeList userMaster
90 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
91 -- TODO HashMap
92 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
93 pure r
94
95 -- TODO
96 -- Exports List
97 -- Version number of the list