]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
refactoring 2021, remove old phylo files, keep API
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE TypeOperators #-}
18 {-# OPTIONS_GHC -fno-warn-orphans #-}
19
20 module Gargantext.API.Node.Corpus.Export
21 where
22
23
24 import Data.Aeson.TH (deriveJSON)
25 import Data.Map (Map)
26 import Data.Maybe (fromMaybe)
27 import Data.Set (Set)
28 import Data.Swagger
29 import Data.Text (Text)
30 import GHC.Generics (Generic)
31 import Gargantext.API.Ngrams.Types
32 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
33 import Gargantext.API.Prelude (GargNoServer)
34 import Gargantext.Prelude.Crypto.Hash (hash)
35 import Gargantext.Core.Types
36 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
37 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
38 import Gargantext.Database.Admin.Config (userMaster)
39 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
40 import Gargantext.Database.Prelude (Cmd)
41 import Gargantext.Database.Query.Table.Node
42 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
43 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
44 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
45 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
46 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
47 import Gargantext.Prelude
48 import Servant
49 import qualified Data.List as List
50 import qualified Data.Map as Map
51 import qualified Data.Set as Set
52
53
54 -- Corpus Export
55 data Corpus =
56 Corpus { _c_corpus :: [Document]
57 , _c_hash :: Hash
58 } deriving (Generic)
59
60 -- | Document Export
61 data Document =
62 Document { _d_document :: Node HyperdataDocument
63 , _d_ngrams :: Ngrams
64 , _d_hash :: Hash
65 } deriving (Generic)
66
67 data Ngrams =
68 Ngrams { _ng_ngrams :: [Text]
69 , _ng_hash :: Hash
70 } deriving (Generic)
71
72 type Hash = Text
73 -------
74 instance ToSchema Corpus where
75 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
76
77 instance ToSchema Document where
78 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
79
80 instance ToSchema Ngrams where
81 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
82
83 instance (ToSchema a) => ToSchema (Node a) where
84 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_node_")
85
86 -------
87 instance ToParamSchema Corpus where
88 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
89
90 instance ToParamSchema Document where
91 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
92
93 instance ToParamSchema Ngrams where
94 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
95 --------------------------------------------------
96 type API = Summary "Corpus Export"
97 :> "export"
98 :> QueryParam "listId" ListId
99 :> QueryParam "ngramsType" NgramsType
100 :> Get '[JSON] Corpus
101
102 --------------------------------------------------
103 -- | Hashes are ordered by Set
104 getCorpus :: CorpusId
105 -> Maybe ListId
106 -> Maybe NgramsType
107 -> GargNoServer Corpus
108 getCorpus cId lId nt' = do
109
110 let
111 nt = case nt' of
112 Nothing -> NgramsTerms
113 Just t -> t
114
115 ns <- Map.fromList
116 <$> map (\n -> (_node_id n, n))
117 <$> selectDocNodes cId
118 repo <- getRepo
119 ngs <- getNodeNgrams cId lId nt repo
120 let -- uniqId is hash computed already for each document imported in database
121 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hash b)) (d_hash a b)
122 ) ns ngs
123 where
124 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
125 , hash b
126 ]
127 pure $ Corpus (Map.elems r) (hash $ List.map _d_hash
128 $ Map.elems r
129 )
130
131 getNodeNgrams :: HasNodeError err
132 => CorpusId
133 -> Maybe ListId
134 -> NgramsType
135 -> NgramsRepo
136 -> Cmd err (Map NodeId (Set Text))
137 getNodeNgrams cId lId' nt repo = do
138 lId <- case lId' of
139 Nothing -> defaultList cId
140 Just l -> pure l
141
142 lIds <- selectNodesWithUsername NodeList userMaster
143 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
144 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
145 pure r
146
147
148 $(deriveJSON (unPrefix "_c_") ''Corpus)
149 $(deriveJSON (unPrefix "_d_") ''Document)
150 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
151
152
153 -- TODO
154 -- Exports List
155 -- Version number of the list
156
157