]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
[WIP] backup during the vacations
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Corpus export
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 module Gargantext.API.Node.Corpus.Export
17 where
18
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Set (Set)
22 import qualified Data.List as List
23 import qualified Data.Map as Map
24 import qualified Data.Set as Set
25 import qualified Data.HashMap.Strict as HashMap
26
27 import Gargantext.API.Node.Corpus.Export.Types
28 import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
29 import Gargantext.API.Ngrams.Types
30 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo')
31 import Gargantext.API.Prelude (GargNoServer)
32 import Gargantext.Prelude.Crypto.Hash (hash)
33 import Gargantext.Core.Types
34 import Gargantext.Core.NodeStory
35 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
36 import Gargantext.Database.Admin.Config (userMaster)
37 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
38 import Gargantext.Database.Prelude (Cmd)
39 import Gargantext.Database.Query.Table.Node
40 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
41 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
42 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
43 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
44 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
45 import Gargantext.Prelude
46
47 --------------------------------------------------
48 -- | Hashes are ordered by Set
49 getCorpus :: CorpusId
50 -> Maybe ListId
51 -> Maybe NgramsType
52 -> GargNoServer Corpus
53 getCorpus cId lId nt' = do
54
55 let
56 nt = case nt' of
57 Nothing -> NgramsTerms
58 Just t -> t
59
60 listId <- case lId of
61 Nothing -> defaultList cId
62 Just l -> pure l
63
64 ns <- Map.fromList
65 <$> map (\n -> (_node_id n, n))
66 <$> selectDocNodes cId
67
68 repo <- getRepo' [listId]
69 ngs <- getNodeNgrams cId listId nt repo
70 let -- uniqId is hash computed already for each document imported in database
71 r = Map.intersectionWith
72 (\a b -> DocumentExport.Document { _d_document = a
73 , _d_ngrams = DocumentExport.Ngrams (Set.toList b) (hash b)
74 , _d_hash = d_hash a b }
75 ) ns (Map.map (Set.map unNgramsTerm) ngs)
76 where
77 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
78 , hash b
79 ]
80 pure $ Corpus { _c_corpus = Map.elems r
81 , _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
82
83 getNodeNgrams :: HasNodeError err
84 => CorpusId
85 -> ListId
86 -> NgramsType
87 -> NodeListStory
88 -> Cmd err (Map NodeId (Set NgramsTerm))
89 getNodeNgrams cId lId nt repo = do
90 -- lId <- case lId' of
91 -- Nothing -> defaultList cId
92 -- Just l -> pure l
93
94 lIds <- selectNodesWithUsername NodeList userMaster
95 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
96 -- TODO HashMap
97 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
98 pure r
99
100 -- TODO
101 -- Exports List
102 -- Version number of the list