2 Module : Gargantext.Utils.JohnSnow
3 Description : John Snow NLP API connexion
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
14 module Gargantext.Utils.JohnSnowNLP where
16 import Control.Concurrent (threadDelay)
18 import Data.Aeson (encode, ToJSON, toJSON, FromJSON, parseJSON, Value(..), (.:), (.:?))
19 import Data.Aeson.TH (deriveJSON)
20 import Data.Aeson.Types (prependFailure, typeMismatch)
21 import Data.Map.Strict (Map)
22 import Data.Maybe (fromMaybe)
23 import Data.Text hiding (map, group, filter, concat, zip)
24 import Gargantext.Core (Lang(..))
25 import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
26 import Gargantext.Core.Types (POS(..))
27 import Gargantext.Core.Utils.Prefix (unPrefix)
28 import Gargantext.Prelude
29 import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
30 import qualified Data.List.Safe as LS
31 import qualified Data.Map.Strict as Map
34 data JSSpell = JSPOS Lang | JSLemma Lang
37 instance ToJSON JSSpell where
38 toJSON (JSPOS All) = "pos"
39 toJSON (JSPOS DE) = "de.pos"
40 toJSON (JSPOS EL) = "el.pos"
41 toJSON (JSPOS EN) = "en.pos"
42 toJSON (JSPOS ES) = "es.pos"
43 toJSON (JSPOS FR) = "fr.pos"
44 toJSON (JSPOS IT) = "it.pos"
45 toJSON (JSPOS PL) = "pl.pos"
46 toJSON (JSPOS PT) = "pt.pos"
47 toJSON (JSPOS RU) = "ru.pos"
48 toJSON (JSPOS UK) = "uk.pos"
49 toJSON (JSPOS ZH) = "zh.pos"
51 toJSON (JSLemma All) = "lemma"
52 toJSON (JSLemma DE) = "de.lemma"
53 toJSON (JSLemma EL) = "el.lemma"
54 toJSON (JSLemma EN) = "en.lemma"
55 toJSON (JSLemma ES) = "es.lemma"
56 toJSON (JSLemma FR) = "fr.lemma"
57 toJSON (JSLemma IT) = "it.lemma"
58 toJSON (JSLemma PL) = "pl.lemma"
59 toJSON (JSLemma PT) = "pt.lemma"
60 toJSON (JSLemma RU) = "ru.lemma"
61 toJSON (JSLemma UK) = "uk.lemma"
62 toJSON (JSLemma ZH) = "zh.lemma"
64 instance FromJSON JSSpell where
65 parseJSON (String "de.pos") = pure $ JSPOS DE
66 parseJSON (String "en.pos") = pure $ JSPOS EN
67 parseJSON (String "el.pos") = pure $ JSPOS EL
68 parseJSON (String "es.pos") = pure $ JSPOS ES
69 parseJSON (String "fr.pos") = pure $ JSPOS FR
70 parseJSON (String "it.pos") = pure $ JSPOS IT
71 parseJSON (String "pl.pos") = pure $ JSPOS PL
72 parseJSON (String "pt.pos") = pure $ JSPOS PT
73 parseJSON (String "ru.pos") = pure $ JSPOS RU
74 parseJSON (String "uk.pos") = pure $ JSPOS UK
75 parseJSON (String "zh.pos") = pure $ JSPOS ZH
76 parseJSON (String "pos") = pure $ JSPOS All
78 parseJSON (String "de.lemma") = pure $ JSLemma DE
79 parseJSON (String "en.lemma") = pure $ JSLemma EN
80 parseJSON (String "el.lemma") = pure $ JSLemma EL
81 parseJSON (String "es.lemma") = pure $ JSLemma ES
82 parseJSON (String "fr.lemma") = pure $ JSLemma FR
83 parseJSON (String "it.lemma") = pure $ JSLemma IT
84 parseJSON (String "pl.lemma") = pure $ JSLemma PL
85 parseJSON (String "pt.lemma") = pure $ JSLemma PT
86 parseJSON (String "ru.lemma") = pure $ JSLemma RU
87 parseJSON (String "uk.lemma") = pure $ JSLemma UK
88 parseJSON (String "zh.lemma") = pure $ JSLemma ZH
89 parseJSON (String "lemma") = pure $ JSLemma All
91 prependFailure "parsing spell failed, "
92 (typeMismatch "Spell" s)
95 JSRequest { _jsRequest_data :: !Text
96 , _jsRequest_format :: !Text
97 , _jsRequest_grouping :: !(Maybe Text)
98 , _jsRequest_spell :: !JSSpell }
102 -- https://nlu.johnsnowlabs.com/docs/en/spellbook
104 deriveJSON (unPrefix "_jsRequest_") ''JSRequest
106 -- | JohnSnow NLP works via asynchronous tasks: send a query and get a
107 -- task in response. One must poll for task status and then get it's
110 JSAsyncTask { _jsAsyncTask_uuid :: !Text }
113 deriveJSON (unPrefix "_jsAsyncTask_") ''JSAsyncTask
116 data JSAsyncTaskStatus =
117 JSAsyncTaskStatus { _jsAsyncTaskStatus_code :: !Text
118 , _jsAsyncTaskStatus_message :: !(Maybe Text) }
121 taskReady :: JSAsyncTaskStatus -> Bool
122 taskReady (JSAsyncTaskStatus { .. }) = _jsAsyncTaskStatus_code == "success"
124 --deriveJSON (unPrefix "_jsAsyncTaskStatus_") ''JSAsyncTaskStatus
125 instance FromJSON JSAsyncTaskStatus where
126 parseJSON (Object v) = do
127 status <- v .: "status"
128 code <- status .: "code"
129 message <- status .:? "message"
130 pure $ JSAsyncTaskStatus { _jsAsyncTaskStatus_code = code
131 , _jsAsyncTaskStatus_message = message }
133 prependFailure "parsing status failed"
134 (typeMismatch "status" s)
136 -- | Response for our query. The `Maybe` types are here because we
137 -- combine 2 types of responses into one: `pos` and `lemma`.
138 data JSAsyncTaskResponse =
139 JSAsyncTaskResponse { _jsAsyncTaskResponse_index :: Map Text Int
140 , _jsAsyncTaskResponse_document :: Map Text Text
141 , _jsAsyncTaskResponse_sentence :: Map Text [Text]
142 , _jsAsyncTaskResponse_lem :: Maybe (Map Text [Text])
143 , _jsAsyncTaskResponse_pos :: Maybe (Map Text [POS])
144 , _jsAsyncTaskResponse_token :: Map Text [Text] }
147 deriveJSON (unPrefix "_jsAsyncTaskResponse_") ''JSAsyncTaskResponse
148 makeLenses ''JSAsyncTaskResponse
150 -- | We need to combine 2 responses: `pos` and `lemma` spells.
151 jsAsyncTaskResponseToSentences :: JSAsyncTaskResponse -> JSAsyncTaskResponse -> PosSentences
152 jsAsyncTaskResponseToSentences jsPos jsLemma =
153 PosSentences { _sentences }
155 _sentences = Map.elems $ Map.mapWithKey mapSentence (jsPos ^. jsAsyncTaskResponse_sentence)
156 mapSentence idx sentence = Sentence { _sentenceIndex = sIndex
157 , _sentenceTokens = sTokens }
159 sIndex = Map.findWithDefault (-1) idx (jsPos ^. jsAsyncTaskResponse_index)
160 lemmas = fromMaybe [] $
161 if Just sentence == Map.lookup idx (jsLemma ^. jsAsyncTaskResponse_sentence) then
162 Map.lookup idx $ fromMaybe Map.empty (jsLemma ^. jsAsyncTaskResponse_lem)
165 sTokens = imap mapPosToken $ zip (Map.findWithDefault [] idx $ fromMaybe Map.empty (jsPos ^. jsAsyncTaskResponse_pos))
166 (Map.findWithDefault [] idx (jsPos ^. jsAsyncTaskResponse_token))
167 mapPosToken idx' (pos, token) = Token { _tokenIndex = -1
169 , _tokenOriginalText = ""
170 , _tokenLemma = fromMaybe "" $ (LS.!!) lemmas idx'
171 , _tokenCharacterOffsetBegin = -1
172 , _tokenCharacterOffsetEnd = -1
173 , _tokenPos = Just pos
174 , _tokenNer = Nothing
175 , _tokenBefore = Nothing
176 , _tokenAfter = Nothing }
178 -----------------------------------------------------
180 jsRequest :: Text -> JSSpell -> IO JSAsyncTask
182 url <- parseRequest $ "POST http://localhost:5000/api/results"
183 let jsReq = JSRequest { _jsRequest_data = t
184 , _jsRequest_format = "text"
185 , _jsRequest_grouping = Nothing
186 , _jsRequest_spell = s }
187 let request = setRequestBodyLBS (encode jsReq) url
188 task <- httpJSON request :: IO (Response JSAsyncTask)
189 pure $ getResponseBody task
191 jsTaskStatus :: JSAsyncTask -> IO JSAsyncTaskStatus
192 jsTaskStatus (JSAsyncTask uuid) = do
193 url <- parseRequest $ unpack $ "GET http://localhost:5000/api/results/" <> uuid <> "/status"
194 status <- httpJSON url
195 pure $ getResponseBody status
197 jsTaskResponse :: JSAsyncTask -> IO JSAsyncTaskResponse
198 jsTaskResponse (JSAsyncTask uuid) = do
199 url <- parseRequest $ unpack $ "GET http://localhost:5000/api/results/" <> uuid
200 result <- httpJSON url
201 pure $ getResponseBody result
203 waitForJsTask :: JSAsyncTask -> IO JSAsyncTaskResponse
204 waitForJsTask jsTask = wait' 0
206 wait' :: Int -> IO JSAsyncTaskResponse
208 status <- jsTaskStatus jsTask
209 if taskReady status then
210 jsTaskResponse jsTask
213 panic "[waitForJsTask] waited for 1 minute and still no answer from JohnSnow NLP"
215 -- printDebug "[waitForJsTask] task not ready, waiting" counter
216 _ <- threadDelay $ 1000000*1
219 getPosTagAndLems :: Lang -> Text -> IO PosSentences
220 getPosTagAndLems l t = do
221 jsPosTask <- jsRequest t (JSPOS l)
222 jsLemmaTask <- jsRequest t (JSLemma l)
224 -- wait for both tasks
225 jsPos <- waitForJsTask jsPosTask
226 jsLemma <- waitForJsTask jsLemmaTask
228 pure $ jsAsyncTaskResponseToSentences jsPos jsLemma
230 nlp :: Lang -> Text -> IO PosSentences
231 nlp = getPosTagAndLems