2 Module : Gargantext.API.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Main exports of Gargantext:
16 {-# LANGUAGE DataKinds #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE FlexibleContexts #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE RankNTypes #-}
22 {-# LANGUAGE TemplateHaskell #-}
23 {-# LANGUAGE TypeOperators #-}
25 module Gargantext.API.Corpus.Export
28 import Data.Aeson.TH (deriveJSON)
30 import Data.Maybe (fromMaybe)
33 import Data.Text (Text)
34 import GHC.Generics (Generic)
35 import Gargantext.API.Ngrams
36 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
37 import Gargantext.API.Admin.Types (GargNoServer)
38 import Gargantext.Core.Types --
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
41 import Gargantext.Database.Query.Table.Node
42 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
43 import Gargantext.Database.Admin.Config (userMaster)
44 import Gargantext.Database.Admin.Types.Errors (HasNodeError)
45 import Gargantext.Database.Admin.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId)
46 import Gargantext.Database.Prelude (Cmd)
47 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
48 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
49 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
50 import Gargantext.Prelude
51 import Gargantext.Prelude.Utils (sha)
53 import qualified Data.List as List
54 import qualified Data.Map as Map
55 import qualified Data.Set as Set
60 Corpus { _c_corpus :: [Document]
66 Document { _d_document :: Node HyperdataDocument
72 Ngrams { _ng_ngrams :: [Text]
78 instance ToSchema Corpus where
79 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
81 instance ToSchema Document where
82 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
84 instance ToSchema Ngrams where
85 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
88 instance ToParamSchema Corpus where
89 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
91 instance ToParamSchema Document where
92 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
94 instance ToParamSchema Ngrams where
95 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
96 --------------------------------------------------
97 type API = Summary "Corpus Export"
99 :> QueryParam "listId" ListId
100 :> QueryParam "ngramsType" NgramsType
101 :> Get '[JSON] Corpus
103 --------------------------------------------------
104 getCorpus :: CorpusId
107 -> GargNoServer Corpus
108 getCorpus cId lId nt' = do
112 Nothing -> NgramsTerms
116 <$> map (\n -> (_node_id n, n))
117 <$> selectDocNodes cId
119 ngs <- getNodeNgrams cId lId nt repo
120 let -- uniqId is hash computed already for each document imported in database
121 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
124 ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
125 d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
128 pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
129 $ List.map _d_hash $ Map.elems r
132 getNodeNgrams :: HasNodeError err
137 -> Cmd err (Map NodeId (Set Text))
138 getNodeNgrams cId lId' nt repo = do
140 Nothing -> defaultList cId
143 lIds <- selectNodesWithUsername NodeList userMaster
144 let ngs = filterListWithRoot GraphTerm $ mapTermListRoot [lId] nt repo
145 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
149 $(deriveJSON (unPrefix "_c_") ''Corpus)
150 $(deriveJSON (unPrefix "_d_") ''Document)
151 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
156 -- Version number of the list