2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Main exports of Gargantext:
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE TypeOperators #-}
19 module Gargantext.API.Node.Corpus.Export
22 import Data.Aeson.TH (deriveJSON)
24 import Data.Maybe (fromMaybe)
27 import Data.Text (Text)
28 import GHC.Generics (Generic)
29 import Gargantext.API.Ngrams
30 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
31 import Gargantext.API.Prelude (GargNoServer)
32 import Gargantext.Core.Types --
33 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
34 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
35 import Gargantext.Database.Query.Table.Node
36 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
37 import Gargantext.Database.Admin.Config (userMaster)
38 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
39 import Gargantext.Database.Admin.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId)
40 import Gargantext.Database.Prelude (Cmd)
41 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
42 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
43 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
44 import Gargantext.Prelude
45 import Gargantext.Prelude.Utils (sha)
47 import qualified Data.List as List
48 import qualified Data.Map as Map
49 import qualified Data.Set as Set
54 Corpus { _c_corpus :: [Document]
60 Document { _d_document :: Node HyperdataDocument
66 Ngrams { _ng_ngrams :: [Text]
72 instance ToSchema Corpus where
73 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
75 instance ToSchema Document where
76 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
78 instance ToSchema Ngrams where
79 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
82 instance ToParamSchema Corpus where
83 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
85 instance ToParamSchema Document where
86 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
88 instance ToParamSchema Ngrams where
89 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
90 --------------------------------------------------
91 type API = Summary "Corpus Export"
93 :> QueryParam "listId" ListId
94 :> QueryParam "ngramsType" NgramsType
97 --------------------------------------------------
101 -> GargNoServer Corpus
102 getCorpus cId lId nt' = do
106 Nothing -> NgramsTerms
110 <$> map (\n -> (_node_id n, n))
111 <$> selectDocNodes cId
113 ngs <- getNodeNgrams cId lId nt repo
114 let -- uniqId is hash computed already for each document imported in database
115 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
118 ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
119 d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
122 pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
123 $ List.map _d_hash $ Map.elems r
126 getNodeNgrams :: HasNodeError err
131 -> Cmd err (Map NodeId (Set Text))
132 getNodeNgrams cId lId' nt repo = do
134 Nothing -> defaultList cId
137 lIds <- selectNodesWithUsername NodeList userMaster
138 let ngs = filterListWithRoot GraphTerm $ mapTermListRoot [lId] nt repo
139 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
143 $(deriveJSON (unPrefix "_c_") ''Corpus)
144 $(deriveJSON (unPrefix "_d_") ''Document)
145 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
150 -- Version number of the list