]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Export.hs
[phylo] filter threshold change
[gargantext.git] / src / Gargantext / API / Node / Corpus / Export.hs
1 {-|
2 Module : Gargantext.API.Node.Corpus.Export
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main exports of Gargantext:
11 - corpus
12 - document and ngrams
13 - lists
14 -}
15
16 {-# LANGUAGE TemplateHaskell #-}
17 {-# LANGUAGE TypeOperators #-}
18 {-# OPTIONS_GHC -fno-warn-orphans #-}
19
20 module Gargantext.API.Node.Corpus.Export
21 where
22
23
24 import Data.Aeson.TH (deriveJSON)
25 import Data.Map (Map)
26 import Data.Maybe (fromMaybe)
27 import Data.Set (Set)
28 import Data.Swagger
29 import Data.Text (Text)
30 import GHC.Generics (Generic)
31 import Gargantext.API.Ngrams
32 import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
33 import Gargantext.API.Prelude (GargNoServer)
34 import Gargantext.Prelude.Crypto.Hash (hash)
35 import Gargantext.Core.Types
36 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
37 import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
38 import Gargantext.Database.Admin.Config (userMaster)
39 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
40 import Gargantext.Database.Admin.Types.Node (Node, NodeId, ListId, CorpusId)
41 import Gargantext.Database.Prelude (Cmd)
42 import Gargantext.Database.Query.Table.Node
43 import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
44 import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
45 import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
46 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
47 import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
48 import Gargantext.Prelude
49 import Servant
50 import qualified Data.List as List
51 import qualified Data.Map as Map
52 import qualified Data.Set as Set
53
54
55 -- Corpus Export
56 data Corpus =
57 Corpus { _c_corpus :: [Document]
58 , _c_hash :: Hash
59 } deriving (Generic)
60
61 -- | Document Export
62 data Document =
63 Document { _d_document :: Node HyperdataDocument
64 , _d_ngrams :: Ngrams
65 , _d_hash :: Hash
66 } deriving (Generic)
67
68 data Ngrams =
69 Ngrams { _ng_ngrams :: [Text]
70 , _ng_hash :: Hash
71 } deriving (Generic)
72
73 type Hash = Text
74 -------
75 instance ToSchema Corpus where
76 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
77
78 instance ToSchema Document where
79 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
80
81 instance ToSchema Ngrams where
82 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
83
84 instance (ToSchema a) => ToSchema (Node a) where
85 declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_node_")
86
87 -------
88 instance ToParamSchema Corpus where
89 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
90
91 instance ToParamSchema Document where
92 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
93
94 instance ToParamSchema Ngrams where
95 toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
96 --------------------------------------------------
97 type API = Summary "Corpus Export"
98 :> "export"
99 :> QueryParam "listId" ListId
100 :> QueryParam "ngramsType" NgramsType
101 :> Get '[JSON] Corpus
102
103 --------------------------------------------------
104 -- | Hashes are ordered by Set
105 getCorpus :: CorpusId
106 -> Maybe ListId
107 -> Maybe NgramsType
108 -> GargNoServer Corpus
109 getCorpus cId lId nt' = do
110
111 let
112 nt = case nt' of
113 Nothing -> NgramsTerms
114 Just t -> t
115
116 ns <- Map.fromList
117 <$> map (\n -> (_node_id n, n))
118 <$> selectDocNodes cId
119 repo <- getRepo
120 ngs <- getNodeNgrams cId lId nt repo
121 let -- uniqId is hash computed already for each document imported in database
122 r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hash b)) (d_hash a b)
123 ) ns ngs
124 where
125 d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
126 , hash b
127 ]
128 pure $ Corpus (Map.elems r) (hash $ List.map _d_hash
129 $ Map.elems r
130 )
131
132 getNodeNgrams :: HasNodeError err
133 => CorpusId
134 -> Maybe ListId
135 -> NgramsType
136 -> NgramsRepo
137 -> Cmd err (Map NodeId (Set Text))
138 getNodeNgrams cId lId' nt repo = do
139 lId <- case lId' of
140 Nothing -> defaultList cId
141 Just l -> pure l
142
143 lIds <- selectNodesWithUsername NodeList userMaster
144 let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
145 r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
146 pure r
147
148
149 $(deriveJSON (unPrefix "_c_") ''Corpus)
150 $(deriveJSON (unPrefix "_d_") ''Document)
151 $(deriveJSON (unPrefix "_ng_") ''Ngrams)
152
153
154 -- TODO
155 -- Exports List
156 -- Version number of the list
157
158