2 Module : Gargantext.Utils.JohnSnow
3 Description : John Snow NLP API connexion
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
14 module Gargantext.Utils.JohnSnowNLP where
16 import Control.Concurrent (threadDelay)
18 import Data.Aeson (encode, ToJSON, toJSON, FromJSON, parseJSON, Value(..), (.:), (.:?))
19 import Data.Aeson.Types (prependFailure, typeMismatch)
20 import Data.Aeson.TH (deriveJSON)
21 import qualified Data.List.Safe as LS
22 import Data.Map.Strict (Map)
23 import qualified Data.Map.Strict as Map
24 import Data.Maybe (fromMaybe)
25 import Data.Text hiding (map, group, filter, concat, zip)
26 import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
28 import Gargantext.Prelude
29 import Gargantext.Core (Lang(..))
30 import Gargantext.Core.Types (POS(..))
31 import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
32 import Gargantext.Core.Utils.Prefix (unPrefix)
35 data JSSpell = JSPOS Lang | JSLemma Lang
38 instance ToJSON JSSpell where
39 toJSON (JSPOS EN) = "en.pos"
40 toJSON (JSPOS FR) = "fr.pos"
41 toJSON (JSPOS DE) = "de.pos"
42 toJSON (JSPOS ES) = "es.pos"
43 toJSON (JSPOS IT) = "it.pos"
44 toJSON (JSPOS PL) = "pl.pos"
45 toJSON (JSPOS CN) = "cn.pos"
46 toJSON (JSPOS All) = "pos"
48 toJSON (JSLemma EN) = "en.lemma"
49 toJSON (JSLemma FR) = "fr.lemma"
50 toJSON (JSLemma DE) = "de.lemma"
51 toJSON (JSLemma ES) = "es.lemma"
52 toJSON (JSLemma IT) = "it.lemma"
53 toJSON (JSLemma PL) = "pl.lemma"
54 toJSON (JSLemma CN) = "cn.lemma"
55 toJSON (JSLemma All) = "lemma"
57 instance FromJSON JSSpell where
58 parseJSON (String "en.pos") = pure $ JSPOS EN
59 parseJSON (String "fr.pos") = pure $ JSPOS FR
60 parseJSON (String "de.pos") = pure $ JSPOS DE
61 parseJSON (String "es.pos") = pure $ JSPOS ES
62 parseJSON (String "it.pos") = pure $ JSPOS IT
63 parseJSON (String "pl.pos") = pure $ JSPOS PL
64 parseJSON (String "cn.pos") = pure $ JSPOS CN
65 parseJSON (String "pos") = pure $ JSPOS All
66 parseJSON (String "en.lemma") = pure $ JSLemma EN
67 parseJSON (String "fr.lemma") = pure $ JSLemma FR
68 parseJSON (String "de.lemma") = pure $ JSLemma DE
69 parseJSON (String "es.lemma") = pure $ JSLemma ES
70 parseJSON (String "it.lemma") = pure $ JSLemma IT
71 parseJSON (String "pl.lemma") = pure $ JSLemma PL
72 parseJSON (String "cn.lemma") = pure $ JSLemma CN
73 parseJSON (String "lemma") = pure $ JSLemma All
75 prependFailure "parsing spell failed, "
76 (typeMismatch "Spell" s)
79 JSRequest { _jsRequest_data :: !Text
80 , _jsRequest_format :: !Text
81 , _jsRequest_grouping :: !(Maybe Text)
82 , _jsRequest_spell :: !JSSpell }
86 -- https://nlu.johnsnowlabs.com/docs/en/spellbook
88 deriveJSON (unPrefix "_jsRequest_") ''JSRequest
90 -- | JohnSnow NLP works via asynchronous tasks: send a query and get a
91 -- task in response. One must poll for task status and then get it's
94 JSAsyncTask { _jsAsyncTask_uuid :: !Text }
97 deriveJSON (unPrefix "_jsAsyncTask_") ''JSAsyncTask
100 data JSAsyncTaskStatus =
101 JSAsyncTaskStatus { _jsAsyncTaskStatus_code :: !Text
102 , _jsAsyncTaskStatus_message :: !(Maybe Text) }
105 taskReady :: JSAsyncTaskStatus -> Bool
106 taskReady (JSAsyncTaskStatus { .. }) = _jsAsyncTaskStatus_code == "success"
108 --deriveJSON (unPrefix "_jsAsyncTaskStatus_") ''JSAsyncTaskStatus
109 instance FromJSON JSAsyncTaskStatus where
110 parseJSON (Object v) = do
111 status <- v .: "status"
112 code <- status .: "code"
113 message <- status .:? "message"
114 pure $ JSAsyncTaskStatus { _jsAsyncTaskStatus_code = code
115 , _jsAsyncTaskStatus_message = message }
117 prependFailure "parsing status failed"
118 (typeMismatch "status" s)
120 -- | Response for our query. The `Maybe` types are here because we
121 -- combine 2 types of responses into one: `pos` and `lemma`.
122 data JSAsyncTaskResponse =
123 JSAsyncTaskResponse { _jsAsyncTaskResponse_index :: Map Text Int
124 , _jsAsyncTaskResponse_document :: Map Text Text
125 , _jsAsyncTaskResponse_sentence :: Map Text [Text]
126 , _jsAsyncTaskResponse_lem :: Maybe (Map Text [Text])
127 , _jsAsyncTaskResponse_pos :: Maybe (Map Text [POS])
128 , _jsAsyncTaskResponse_token :: Map Text [Text] }
131 deriveJSON (unPrefix "_jsAsyncTaskResponse_") ''JSAsyncTaskResponse
132 makeLenses ''JSAsyncTaskResponse
134 -- | We need to combine 2 responses: `pos` and `lemma` spells.
135 jsAsyncTaskResponseToSentences :: JSAsyncTaskResponse -> JSAsyncTaskResponse -> PosSentences
136 jsAsyncTaskResponseToSentences jsPos jsLemma =
137 PosSentences { _sentences }
139 _sentences = Map.elems $ Map.mapWithKey mapSentence (jsPos ^. jsAsyncTaskResponse_sentence)
140 mapSentence idx sentence = Sentence { _sentenceIndex = sIndex
141 , _sentenceTokens = sTokens }
143 sIndex = Map.findWithDefault (-1) idx (jsPos ^. jsAsyncTaskResponse_index)
144 lemmas = fromMaybe [] $
145 if Just sentence == Map.lookup idx (jsLemma ^. jsAsyncTaskResponse_sentence) then
146 Map.lookup idx $ fromMaybe Map.empty (jsLemma ^. jsAsyncTaskResponse_lem)
149 sTokens = imap mapPosToken $ zip (Map.findWithDefault [] idx $ fromMaybe Map.empty (jsPos ^. jsAsyncTaskResponse_pos))
150 (Map.findWithDefault [] idx (jsPos ^. jsAsyncTaskResponse_token))
151 mapPosToken idx' (pos, token) = Token { _tokenIndex = -1
153 , _tokenOriginalText = ""
154 , _tokenLemma = fromMaybe "" $ (LS.!!) lemmas idx'
155 , _tokenCharacterOffsetBegin = -1
156 , _tokenCharacterOffsetEnd = -1
157 , _tokenPos = Just pos
158 , _tokenNer = Nothing
159 , _tokenBefore = Nothing
160 , _tokenAfter = Nothing }
162 -----------------------------------------------------
164 jsRequest :: Text -> JSSpell -> IO JSAsyncTask
166 url <- parseRequest $ "POST http://localhost:5000/api/results"
167 let jsReq = JSRequest { _jsRequest_data = t
168 , _jsRequest_format = "text"
169 , _jsRequest_grouping = Nothing
170 , _jsRequest_spell = s }
171 let request = setRequestBodyLBS (encode jsReq) url
172 task <- httpJSON request :: IO (Response JSAsyncTask)
173 pure $ getResponseBody task
175 jsTaskStatus :: JSAsyncTask -> IO JSAsyncTaskStatus
176 jsTaskStatus (JSAsyncTask uuid) = do
177 url <- parseRequest $ unpack $ "GET http://localhost:5000/api/results/" <> uuid <> "/status"
178 status <- httpJSON url
179 pure $ getResponseBody status
181 jsTaskResponse :: JSAsyncTask -> IO JSAsyncTaskResponse
182 jsTaskResponse (JSAsyncTask uuid) = do
183 url <- parseRequest $ unpack $ "GET http://localhost:5000/api/results/" <> uuid
184 result <- httpJSON url
185 pure $ getResponseBody result
187 waitForJsTask :: JSAsyncTask -> IO JSAsyncTaskResponse
188 waitForJsTask jsTask = wait' 0
190 wait' :: Int -> IO JSAsyncTaskResponse
192 status <- jsTaskStatus jsTask
193 if taskReady status then
194 jsTaskResponse jsTask
197 panic "[waitForJsTask] waited for 1 minute and still no answer from JohnSnow NLP"
199 -- printDebug "[waitForJsTask] task not ready, waiting" counter
200 _ <- threadDelay $ 1000000*1
203 getPosTagAndLems :: Lang -> Text -> IO PosSentences
204 getPosTagAndLems l t = do
205 jsPosTask <- jsRequest t (JSPOS l)
206 jsLemmaTask <- jsRequest t (JSLemma l)
208 -- wait for both tasks
209 jsPos <- waitForJsTask jsPosTask
210 jsLemma <- waitForJsTask jsLemmaTask
212 pure $ jsAsyncTaskResponseToSentences jsPos jsLemma
214 nlp :: Lang -> Text -> IO PosSentences
215 nlp = getPosTagAndLems