]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
[DB] Trigger for sha256sum (WIP)
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE TypeOperators #-}
18
19 module Gargantext.API.Node.Corpus.Export
20 where
21
22
23 import Data.Aeson.TH (deriveJSON)
24 import Data.Map (Map)
25 import Data.Maybe (fromMaybe)
26 import Data.Set (Set)
27 import Data.Swagger
28 import Data.Text (Text)
29 import GHC.Generics (Generic)
30 import Gargantext.API.Ngrams
31 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
32 import Gargantext.API.Prelude (GargNoServer)
33 import Gargantext.Prelude.Crypto.Hash (hash)
34 import Gargantext.Core.Types
35 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
36 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
37 import Gargantext.Database.Admin.Config (userMaster)
38 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
39 import Gargantext.Database.Admin.Types.Node (Node, NodeId, ListId, CorpusId)
40 import Gargantext.Database.Prelude (Cmd)
41 import Gargantext.Database.Query.Table.Node
42 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
43 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
44 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
45 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
46 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
47 import Gargantext.Prelude
48 import Servant
49 import qualified Data.List as List
50 import qualified Data.Map as Map
51 import qualified Data.Set as Set
52
53
54 -- Corpus Export
55 data Corpus =
56 Corpus { _c_corpus :: [Document]
57 , _c_hash :: Hash
58 } deriving (Generic)
59
60 -- | Document Export
61 data Document =
62 Document { _d_document :: Node HyperdataDocument
63 , _d_ngrams :: Ngrams
64 , _d_hash :: Hash
65 } deriving (Generic)
66
67 data Ngrams =
68 Ngrams { _ng_ngrams :: [Text]
69 , _ng_hash :: Hash
70 } deriving (Generic)
71
72 type Hash = Text
73 -------
74 instance ToSchema Corpus where
75 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
76
77 instance ToSchema Document where
78 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
79
80 instance ToSchema Ngrams where
81 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
82
83 -------
84 instance ToParamSchema Corpus where
85 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
86
87 instance ToParamSchema Document where
88 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
89
90 instance ToParamSchema Ngrams where
91 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
92 --------------------------------------------------
93 type API = Summary "Corpus Export"
94 :> "export"
95 :> QueryParam "listId" ListId
96 :> QueryParam "ngramsType" NgramsType
97 :> Get '[JSON] Corpus
98
99 --------------------------------------------------
100 -- | Hashes are ordered by Set
101 getCorpus :: CorpusId
102 -> Maybe ListId
103 -> Maybe NgramsType
104 -> GargNoServer Corpus
105 getCorpus cId lId nt' = do
106
107 let
108 nt = case nt' of
109 Nothing -> NgramsTerms
110 Just t -> t
111
112 ns <- Map.fromList
113 <$> map (\n -> (_node_id n, n))
114 <$> selectDocNodes cId
115 repo <- getRepo
116 ngs <- getNodeNgrams cId lId nt repo
117 let -- uniqId is hash computed already for each document imported in database
118 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hash b)) (d_hash a b)
119 ) ns ngs
120 where
121 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
122 , hash b
123 ]
124 pure $ Corpus (Map.elems r) (hash $ List.map _d_hash
125 $ Map.elems r
126 )
127
128 getNodeNgrams :: HasNodeError err
129 => CorpusId
130 -> Maybe ListId
131 -> NgramsType
132 -> NgramsRepo
133 -> Cmd err (Map NodeId (Set Text))
134 getNodeNgrams cId lId' nt repo = do
135 lId <- case lId' of
136 Nothing -> defaultList cId
137 Just l -> pure l
138
139 lIds <- selectNodesWithUsername NodeList userMaster
140 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
141 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
142 pure r
143
144
145 $(deriveJSON (unPrefix "_c_") ''Corpus)
146 $(deriveJSON (unPrefix "_d_") ''Document)
147 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
148
149
150 -- TODO
151 -- Exports List
152 -- Version number of the list
153
154