2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Main exports of Gargantext:
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE TypeOperators #-}
19 module Gargantext.API.Node.Corpus.Export
22 import Data.Aeson.TH (deriveJSON)
23 import qualified Data.List as List
24 import qualified Data.Map as Map
26 import Data.Maybe (fromMaybe)
28 import qualified Data.Set as Set
30 import Data.Text (Text)
31 import GHC.Generics (Generic)
34 import Gargantext.API.Ngrams
35 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
36 import Gargantext.API.Prelude (GargNoServer)
37 import Gargantext.Core.Types --
38 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
39 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
40 import Gargantext.Database.Admin.Config (userMaster)
41 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
42 import Gargantext.Database.Admin.Types.Node (Node, NodeId, ListId, CorpusId)
43 import Gargantext.Database.Prelude (Cmd)
44 import Gargantext.Database.Query.Table.Node
45 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
46 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
47 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
48 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
49 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
50 import Gargantext.Prelude
51 import Gargantext.Prelude.Utils (sha)
56 Corpus { _c_corpus :: [Document]
62 Document { _d_document :: Node HyperdataDocument
68 Ngrams { _ng_ngrams :: [Text]
74 instance ToSchema Corpus where
75 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
77 instance ToSchema Document where
78 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
80 instance ToSchema Ngrams where
81 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
84 instance ToParamSchema Corpus where
85 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
87 instance ToParamSchema Document where
88 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
90 instance ToParamSchema Ngrams where
91 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
92 --------------------------------------------------
93 type API = Summary "Corpus Export"
95 :> QueryParam "listId" ListId
96 :> QueryParam "ngramsType" NgramsType
99 --------------------------------------------------
100 getCorpus :: CorpusId
103 -> GargNoServer Corpus
104 getCorpus cId lId nt' = do
108 Nothing -> NgramsTerms
112 <$> map (\n -> (_node_id n, n))
113 <$> selectDocNodes cId
115 ngs <- getNodeNgrams cId lId nt repo
116 let -- uniqId is hash computed already for each document imported in database
117 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
120 ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
121 d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
124 pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
125 $ List.map _d_hash $ Map.elems r
128 getNodeNgrams :: HasNodeError err
133 -> Cmd err (Map NodeId (Set Text))
134 getNodeNgrams cId lId' nt repo = do
136 Nothing -> defaultList cId
139 lIds <- selectNodesWithUsername NodeList userMaster
140 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
141 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
145 $(deriveJSON (unPrefix "_c_") ''Corpus)
146 $(deriveJSON (unPrefix "_d_") ''Document)
147 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
152 -- Version number of the list