2 Module : Gargantext.API.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Main exports of Gargantext:
16 {-# LANGUAGE DataKinds #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE FlexibleContexts #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE RankNTypes #-}
22 {-# LANGUAGE TemplateHaskell #-}
23 {-# LANGUAGE TypeOperators #-}
25 module Gargantext.API.Corpus.Export
28 import Data.Aeson.TH (deriveJSON)
30 import Data.Maybe (fromMaybe)
33 import Data.Text (Text)
34 import GHC.Generics (Generic)
35 import Gargantext.API.Ngrams
36 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
37 import Gargantext.API.Admin.Types (GargNoServer)
38 import Gargantext.Core.Types --
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
41 import Gargantext.Database.Action.Query.Node
42 import Gargantext.Database.Action.Query.Node.Select (selectNodesWithUsername)
43 import Gargantext.Database.Admin.Config (userMaster)
44 import Gargantext.Database.Admin.Types.Errors (HasNodeError)
45 import Gargantext.Database.Admin.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId)
46 import Gargantext.Database.Admin.Utils (Cmd)
47 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
48 import Gargantext.Database.Schema.NodeNode (selectDocNodes)
49 import Gargantext.Prelude
50 import Gargantext.Prelude.Utils (sha)
52 import qualified Data.List as List
53 import qualified Data.Map as Map
54 import qualified Data.Set as Set
59 Corpus { _c_corpus :: [Document]
65 Document { _d_document :: Node HyperdataDocument
71 Ngrams { _ng_ngrams :: [Text]
77 instance ToSchema Corpus where
78 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
80 instance ToSchema Document where
81 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
83 instance ToSchema Ngrams where
84 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
87 instance ToParamSchema Corpus where
88 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
90 instance ToParamSchema Document where
91 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
93 instance ToParamSchema Ngrams where
94 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
95 --------------------------------------------------
96 type API = Summary "Corpus Export"
98 :> QueryParam "listId" ListId
99 :> QueryParam "ngramsType" NgramsType
100 :> Get '[JSON] Corpus
102 --------------------------------------------------
103 getCorpus :: CorpusId
106 -> GargNoServer Corpus
107 getCorpus cId lId nt' = do
111 Nothing -> NgramsTerms
115 <$> map (\n -> (_node_id n, n))
116 <$> selectDocNodes cId
118 ngs <- getNodeNgrams cId lId nt repo
119 let -- uniqId is hash computed already for each document imported in database
120 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
123 ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
124 d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
127 pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
128 $ List.map _d_hash $ Map.elems r
131 getNodeNgrams :: HasNodeError err
136 -> Cmd err (Map NodeId (Set Text))
137 getNodeNgrams cId lId' nt repo = do
139 Nothing -> defaultList cId
142 lIds <- selectNodesWithUsername NodeList userMaster
143 let ngs = filterListWithRoot GraphTerm $ mapTermListRoot [lId] nt repo
144 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
148 $(deriveJSON (unPrefix "_c_") ''Corpus)
149 $(deriveJSON (unPrefix "_d_") ''Document)
150 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
155 -- Version number of the list