2 Module : Gargantext.API.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Main exports of Gargantext:
16 {-# LANGUAGE DataKinds #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE FlexibleContexts #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE RankNTypes #-}
22 {-# LANGUAGE TemplateHaskell #-}
23 {-# LANGUAGE TypeOperators #-}
25 module Gargantext.API.Export
28 import Data.Aeson.TH (deriveJSON)
31 import Data.Maybe (fromMaybe)
33 import Data.Text (Text)
34 import GHC.Generics (Generic)
35 import Gargantext.API.Ngrams
36 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
37 import Gargantext.API.Types (GargNoServer)
38 import Gargantext.Core.Types --
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Database.Config (userMaster)
41 import Gargantext.Database.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
42 import Gargantext.Database.Node.Select (selectNodesWithUsername)
43 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
44 import Gargantext.Database.Schema.Node (defaultList, HasNodeError)
45 import Gargantext.Database.Schema.NodeNode (selectDocNodes)
46 import Gargantext.Database.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId)
47 import Gargantext.Database.Utils (Cmd)
48 import Gargantext.Prelude
49 import Gargantext.Prelude.Utils (sha)
51 import qualified Data.Map as Map
52 import qualified Data.Set as Set
53 import qualified Data.List as List
58 Corpus { _c_corpus :: [Document]
64 Document { _d_document :: Node HyperdataDocument
70 Ngrams { _ng_ngrams :: [Text]
76 instance ToSchema Corpus where
77 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
79 instance ToSchema Document where
80 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
82 instance ToSchema Ngrams where
83 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
86 instance ToParamSchema Corpus where
87 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
89 instance ToParamSchema Document where
90 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
92 instance ToParamSchema Ngrams where
93 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
94 --------------------------------------------------
95 type API = Summary "Corpus Export"
97 :> QueryParam "listId" ListId
98 :> QueryParam "ngramsType" NgramsType
101 --------------------------------------------------
102 getCorpus :: CorpusId
105 -> GargNoServer Corpus
106 getCorpus cId lId nt' = do
110 Nothing -> NgramsTerms
114 <$> map (\n -> (_node_id n, n))
115 <$> selectDocNodes cId
117 ngs <- getNodeNgrams cId lId nt repo
118 let -- uniqId is hash computed already for each document imported in database
119 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
122 ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
123 d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
126 pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
127 $ List.map _d_hash $ Map.elems r
130 getNodeNgrams :: HasNodeError err
135 -> Cmd err (Map NodeId (Set Text))
136 getNodeNgrams cId lId' nt repo = do
138 Nothing -> defaultList cId
141 lIds <- selectNodesWithUsername NodeList userMaster
142 let ngs = filterListWithRoot GraphTerm $ mapTermListRoot [lId] nt repo
143 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
147 $(deriveJSON (unPrefix "_c_") ''Corpus)
148 $(deriveJSON (unPrefix "_d_") ''Document)
149 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
154 -- Version number of the list