]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Corpus/Export.hs
[DB/FACT] Gargantext.Database.Prelude
[gargantext.git] / src / Gargantext / API / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 {-# LANGUAGE DataKinds #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE FlexibleContexts #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE RankNTypes #-}
22 {-# LANGUAGE TemplateHaskell #-}
23 {-# LANGUAGE TypeOperators #-}
24
25 module Gargantext.API.Corpus.Export
26 where
27
28 import Data.Aeson.TH (deriveJSON)
29 import Data.Map (Map)
30 import Data.Maybe (fromMaybe)
31 import Data.Set (Set)
32 import Data.Swagger
33 import Data.Text (Text)
34 import GHC.Generics (Generic)
35 import Gargantext.API.Ngrams
36 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
37 import Gargantext.API.Admin.Types (GargNoServer)
38 import Gargantext.Core.Types --
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
41 import Gargantext.Database.Query.Table.Node
42 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
43 import Gargantext.Database.Admin.Config (userMaster)
44 import Gargantext.Database.Admin.Types.Errors (HasNodeError)
45 import Gargantext.Database.Admin.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId)
46 import Gargantext.Database.Prelude (Cmd)
47 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
48 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
49 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
50 import Gargantext.Prelude
51 import Gargantext.Prelude.Utils (sha)
52 import Servant
53 import qualified Data.List as List
54 import qualified Data.Map as Map
55 import qualified Data.Set as Set
56
57
58 -- Corpus Export
59 data Corpus =
60 Corpus { _c_corpus :: [Document]
61 , _c_hash :: Hash
62 } deriving (Generic)
63
64 -- | Document Export
65 data Document =
66 Document { _d_document :: Node HyperdataDocument
67 , _d_ngrams :: Ngrams
68 , _d_hash :: Hash
69 } deriving (Generic)
70
71 data Ngrams =
72 Ngrams { _ng_ngrams :: [Text]
73 , _ng_hash :: Hash
74 } deriving (Generic)
75
76 type Hash = Text
77 -------
78 instance ToSchema Corpus where
79 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
80
81 instance ToSchema Document where
82 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
83
84 instance ToSchema Ngrams where
85 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
86
87 -------
88 instance ToParamSchema Corpus where
89 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
90
91 instance ToParamSchema Document where
92 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
93
94 instance ToParamSchema Ngrams where
95 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
96 --------------------------------------------------
97 type API = Summary "Corpus Export"
98 :> "export"
99 :> QueryParam "listId" ListId
100 :> QueryParam "ngramsType" NgramsType
101 :> Get '[JSON] Corpus
102
103 --------------------------------------------------
104 getCorpus :: CorpusId
105 -> Maybe ListId
106 -> Maybe NgramsType
107 -> GargNoServer Corpus
108 getCorpus cId lId nt' = do
109
110 let
111 nt = case nt' of
112 Nothing -> NgramsTerms
113 Just t -> t
114
115 ns <- Map.fromList
116 <$> map (\n -> (_node_id n, n))
117 <$> selectDocNodes cId
118 repo <- getRepo
119 ngs <- getNodeNgrams cId lId nt repo
120 let -- uniqId is hash computed already for each document imported in database
121 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
122 ) ns ngs
123 where
124 ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
125 d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
126 <> (ng_hash b)
127
128 pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
129 $ List.map _d_hash $ Map.elems r
130 )
131
132 getNodeNgrams :: HasNodeError err
133 => CorpusId
134 -> Maybe ListId
135 -> NgramsType
136 -> NgramsRepo
137 -> Cmd err (Map NodeId (Set Text))
138 getNodeNgrams cId lId' nt repo = do
139 lId <- case lId' of
140 Nothing -> defaultList cId
141 Just l -> pure l
142
143 lIds <- selectNodesWithUsername NodeList userMaster
144 let ngs = filterListWithRoot GraphTerm $ mapTermListRoot [lId] nt repo
145 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
146 pure r
147
148
149 $(deriveJSON (unPrefix "_c_") ''Corpus)
150 $(deriveJSON (unPrefix "_d_") ''Document)
151 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
152
153
154 -- TODO
155 -- Exports List
156 -- Version number of the list
157
158