]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Export.hs
[OPTIM] concurrent threads (fix mem leaks)
[gargantext.git] / src / Gargantext / API / Export.hs
1 {-|
2 Module : Gargantext.API.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 {-# LANGUAGE DataKinds #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE FlexibleContexts #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21 {-# LANGUAGE RankNTypes #-}
22 {-# LANGUAGE TemplateHaskell #-}
23 {-# LANGUAGE TypeOperators #-}
24
25 module Gargantext.API.Export
26 where
27
28 import Data.Aeson.TH (deriveJSON)
29 import Data.Map (Map)
30 import Data.Set (Set)
31 import Data.Maybe (fromMaybe)
32 import Data.Swagger
33 import Data.Text (Text)
34 import GHC.Generics (Generic)
35 import Gargantext.API.Ngrams
36 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
37 import Gargantext.API.Types (GargNoServer)
38 import Gargantext.Core.Types --
39 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
40 import Gargantext.Database.Config (userMaster)
41 import Gargantext.Database.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
42 import Gargantext.Database.Node.Select (selectNodesWithUsername)
43 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
44 import Gargantext.Database.Schema.Node (defaultList, HasNodeError)
45 import Gargantext.Database.Schema.NodeNode (selectDocNodes)
46 import Gargantext.Database.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId)
47 import Gargantext.Database.Utils (Cmd)
48 import Gargantext.Prelude
49 import Gargantext.Prelude.Utils (sha)
50 import Servant
51 import qualified Data.Map as Map
52 import qualified Data.Set as Set
53 import qualified Data.List as List
54
55
56 -- Corpus Export
57 data Corpus =
58 Corpus { _c_corpus :: [Document]
59 , _c_hash :: Hash
60 } deriving (Generic)
61
62 -- | Document Export
63 data Document =
64 Document { _d_document :: Node HyperdataDocument
65 , _d_ngrams :: Ngrams
66 , _d_hash :: Hash
67 } deriving (Generic)
68
69 data Ngrams =
70 Ngrams { _ng_ngrams :: [Text]
71 , _ng_hash :: Hash
72 } deriving (Generic)
73
74 type Hash = Text
75 -------
76 instance ToSchema Corpus where
77 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
78
79 instance ToSchema Document where
80 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
81
82 instance ToSchema Ngrams where
83 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
84
85 -------
86 instance ToParamSchema Corpus where
87 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
88
89 instance ToParamSchema Document where
90 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
91
92 instance ToParamSchema Ngrams where
93 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
94 --------------------------------------------------
95 type API = Summary "Corpus Export"
96 :> "export"
97 :> QueryParam "listId" ListId
98 :> QueryParam "ngramsType" NgramsType
99 :> Get '[JSON] Corpus
100
101 --------------------------------------------------
102 getCorpus :: CorpusId
103 -> Maybe ListId
104 -> Maybe NgramsType
105 -> GargNoServer Corpus
106 getCorpus cId lId nt' = do
107
108 let
109 nt = case nt' of
110 Nothing -> NgramsTerms
111 Just t -> t
112
113 ns <- Map.fromList
114 <$> map (\n -> (_node_id n, n))
115 <$> selectDocNodes cId
116 repo <- getRepo
117 ngs <- getNodeNgrams cId lId nt repo
118 let -- uniqId is hash computed already for each document imported in database
119 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
120 ) ns ngs
121 where
122 ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
123 d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
124 <> (ng_hash b)
125
126 pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
127 $ List.map _d_hash $ Map.elems r
128 )
129
130 getNodeNgrams :: HasNodeError err
131 => CorpusId
132 -> Maybe ListId
133 -> NgramsType
134 -> NgramsRepo
135 -> Cmd err (Map NodeId (Set Text))
136 getNodeNgrams cId lId' nt repo = do
137 lId <- case lId' of
138 Nothing -> defaultList cId
139 Just l -> pure l
140
141 lIds <- selectNodesWithUsername NodeList userMaster
142 let ngs = filterListWithRoot GraphTerm $ mapTermListRoot [lId] nt repo
143 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
144 pure r
145
146
147 $(deriveJSON (unPrefix "_c_") ''Corpus)
148 $(deriveJSON (unPrefix "_d_") ''Document)
149 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
150
151
152 -- TODO
153 -- Exports List
154 -- Version number of the list
155
156