]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
Merge branch 'dev' into dev-doc-table-optimization
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE TypeOperators #-}
18
19 module Gargantext.API.Node.Corpus.Export
20 where
21
22 import Data.Aeson.TH (deriveJSON)
23 import qualified Data.List as List
24 import qualified Data.Map as Map
25 import Data.Map (Map)
26 import Data.Maybe (fromMaybe)
27 import Data.Set (Set)
28 import qualified Data.Set as Set
29 import Data.Swagger
30 import Data.Text (Text)
31 import GHC.Generics (Generic)
32 import Servant
33
34 import Gargantext.API.Ngrams
35 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
36 import Gargantext.API.Prelude (GargNoServer)
37 import Gargantext.Core.Types --
38 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
39 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
40 import Gargantext.Database.Admin.Config (userMaster)
41 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
42 import Gargantext.Database.Admin.Types.Node (Node, NodeId, ListId, CorpusId)
43 import Gargantext.Database.Prelude (Cmd)
44 import Gargantext.Database.Query.Table.Node
45 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
46 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
47 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
48 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
49 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
50 import Gargantext.Prelude
51 import Gargantext.Prelude.Utils (sha)
52
53
54 -- Corpus Export
55 data Corpus =
56 Corpus { _c_corpus :: [Document]
57 , _c_hash :: Hash
58 } deriving (Generic)
59
60 -- | Document Export
61 data Document =
62 Document { _d_document :: Node HyperdataDocument
63 , _d_ngrams :: Ngrams
64 , _d_hash :: Hash
65 } deriving (Generic)
66
67 data Ngrams =
68 Ngrams { _ng_ngrams :: [Text]
69 , _ng_hash :: Hash
70 } deriving (Generic)
71
72 type Hash = Text
73 -------
74 instance ToSchema Corpus where
75 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
76
77 instance ToSchema Document where
78 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
79
80 instance ToSchema Ngrams where
81 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
82
83 -------
84 instance ToParamSchema Corpus where
85 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
86
87 instance ToParamSchema Document where
88 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
89
90 instance ToParamSchema Ngrams where
91 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
92 --------------------------------------------------
93 type API = Summary "Corpus Export"
94 :> "export"
95 :> QueryParam "listId" ListId
96 :> QueryParam "ngramsType" NgramsType
97 :> Get '[JSON] Corpus
98
99 --------------------------------------------------
100 getCorpus :: CorpusId
101 -> Maybe ListId
102 -> Maybe NgramsType
103 -> GargNoServer Corpus
104 getCorpus cId lId nt' = do
105
106 let
107 nt = case nt' of
108 Nothing -> NgramsTerms
109 Just t -> t
110
111 ns <- Map.fromList
112 <$> map (\n -> (_node_id n, n))
113 <$> selectDocNodes cId
114 repo <- getRepo
115 ngs <- getNodeNgrams cId lId nt repo
116 let -- uniqId is hash computed already for each document imported in database
117 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
118 ) ns ngs
119 where
120 ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
121 d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
122 <> (ng_hash b)
123
124 pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
125 $ List.map _d_hash $ Map.elems r
126 )
127
128 getNodeNgrams :: HasNodeError err
129 => CorpusId
130 -> Maybe ListId
131 -> NgramsType
132 -> NgramsRepo
133 -> Cmd err (Map NodeId (Set Text))
134 getNodeNgrams cId lId' nt repo = do
135 lId <- case lId' of
136 Nothing -> defaultList cId
137 Just l -> pure l
138
139 lIds <- selectNodesWithUsername NodeList userMaster
140 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
141 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
142 pure r
143
144
145 $(deriveJSON (unPrefix "_c_") ''Corpus)
146 $(deriveJSON (unPrefix "_d_") ''Document)
147 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
148
149
150 -- TODO
151 -- Exports List
152 -- Version number of the list
153
154