2 Module : Gargantext.Utils.JohnSnow
3 Description : John Snow NLP API connexion
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE TemplateHaskell #-}
14 module Gargantext.Utils.JohnSnowNLP where
16 import Control.Concurrent (threadDelay)
18 import Data.Aeson (encode, ToJSON, toJSON, FromJSON, parseJSON, Value(..), (.:), (.:?))
19 import Data.Aeson.Types (prependFailure, typeMismatch)
20 import Data.Aeson.TH (deriveJSON)
21 import qualified Data.List.Safe as LS
23 import qualified Data.Map as Map
24 import Data.Maybe (fromMaybe)
25 import Data.Text hiding (map, group, filter, concat, zip)
26 import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
28 import Gargantext.Prelude
29 import Gargantext.Core (Lang(..))
30 import Gargantext.Core.Types (POS(..))
31 import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
32 import Gargantext.Core.Utils.Prefix (unPrefix)
35 data JSSpell = JSPOS Lang | JSLemma Lang
38 instance ToJSON JSSpell where
39 toJSON (JSPOS EN) = "en.pos"
40 toJSON (JSPOS FR) = "fr.pos"
41 toJSON (JSPOS All) = "pos"
42 toJSON (JSLemma EN) = "en.lemma"
43 toJSON (JSLemma FR) = "fr.lemma"
44 toJSON (JSLemma All) = "lemma"
46 instance FromJSON JSSpell where
47 parseJSON (String "en.pos") = pure $ JSPOS EN
48 parseJSON (String "fr.pos") = pure $ JSPOS FR
49 parseJSON (String "pos") = pure $ JSPOS All
50 parseJSON (String "en.lemma") = pure $ JSLemma EN
51 parseJSON (String "fr.lemma") = pure $ JSLemma FR
52 parseJSON (String "lemma") = pure $ JSLemma All
54 prependFailure "parsing spell failed, "
55 (typeMismatch "Spell" s)
58 JSRequest { _jsRequest_data :: !Text
59 , _jsRequest_format :: !Text
60 , _jsRequest_grouping :: !(Maybe Text)
61 , _jsRequest_spell :: !JSSpell }
65 -- https://nlu.johnsnowlabs.com/docs/en/spellbook
67 deriveJSON (unPrefix "_jsRequest_") ''JSRequest
69 -- | JohnSnow NLP works via asynchronous tasks: send a query and get a
70 -- task in response. One must poll for task status and then get it's
73 JSAsyncTask { _jsAsyncTask_uuid :: !Text }
76 deriveJSON (unPrefix "_jsAsyncTask_") ''JSAsyncTask
79 data JSAsyncTaskStatus =
80 JSAsyncTaskStatus { _jsAsyncTaskStatus_code :: !Text
81 , _jsAsyncTaskStatus_message :: !(Maybe Text) }
84 taskReady :: JSAsyncTaskStatus -> Bool
85 taskReady (JSAsyncTaskStatus { .. }) = _jsAsyncTaskStatus_code == "success"
87 --deriveJSON (unPrefix "_jsAsyncTaskStatus_") ''JSAsyncTaskStatus
88 instance FromJSON JSAsyncTaskStatus where
89 parseJSON (Object v) = do
90 status <- v .: "status"
91 code <- status .: "code"
92 message <- status .:? "message"
93 pure $ JSAsyncTaskStatus { _jsAsyncTaskStatus_code = code
94 , _jsAsyncTaskStatus_message = message }
96 prependFailure "parsing status failed"
97 (typeMismatch "status" s)
99 -- | Response for our query. The `Maybe` types are here because we
100 -- combine 2 types of responses into one: `pos` and `lemma`.
101 data JSAsyncTaskResponse =
102 JSAsyncTaskResponse { _jsAsyncTaskResponse_index :: Map Text Int
103 , _jsAsyncTaskResponse_document :: Map Text Text
104 , _jsAsyncTaskResponse_sentence :: Map Text [Text]
105 , _jsAsyncTaskResponse_lem :: Maybe (Map Text [Text])
106 , _jsAsyncTaskResponse_pos :: Maybe (Map Text [POS])
107 , _jsAsyncTaskResponse_token :: Map Text [Text] }
110 deriveJSON (unPrefix "_jsAsyncTaskResponse_") ''JSAsyncTaskResponse
111 makeLenses ''JSAsyncTaskResponse
113 -- | We need to combine 2 responses: `pos` and `lemma` spells.
114 jsAsyncTaskResponseToSentences :: JSAsyncTaskResponse -> JSAsyncTaskResponse -> PosSentences
115 jsAsyncTaskResponseToSentences jsPos jsLemma =
116 PosSentences { _sentences }
118 _sentences = Map.elems $ Map.mapWithKey mapSentence (jsPos ^. jsAsyncTaskResponse_sentence)
119 mapSentence idx sentence = Sentence { _sentenceIndex = sIndex
120 , _sentenceTokens = sTokens }
122 sIndex = Map.findWithDefault (-1) idx (jsPos ^. jsAsyncTaskResponse_index)
123 lemmas = fromMaybe [] $
124 if Just sentence == Map.lookup idx (jsLemma ^. jsAsyncTaskResponse_sentence) then
125 Map.lookup idx $ fromMaybe Map.empty (jsLemma ^. jsAsyncTaskResponse_lem)
128 sTokens = imap mapPosToken $ zip (Map.findWithDefault [] idx $ fromMaybe Map.empty (jsPos ^. jsAsyncTaskResponse_pos))
129 (Map.findWithDefault [] idx (jsPos ^. jsAsyncTaskResponse_token))
130 mapPosToken idx' (pos, token) = Token { _tokenIndex = -1
132 , _tokenOriginalText = ""
133 , _tokenLemma = fromMaybe "" $ (LS.!!) lemmas idx'
134 , _tokenCharacterOffsetBegin = -1
135 , _tokenCharacterOffsetEnd = -1
136 , _tokenPos = Just pos
137 , _tokenNer = Nothing
138 , _tokenBefore = Nothing
139 , _tokenAfter = Nothing }
141 -----------------------------------------------------
143 jsRequest :: Text -> JSSpell -> IO JSAsyncTask
145 url <- parseRequest $ "POST http://localhost:5000/api/results"
146 let jsReq = JSRequest { _jsRequest_data = t
147 , _jsRequest_format = "text"
148 , _jsRequest_grouping = Nothing
149 , _jsRequest_spell = s }
150 let request = setRequestBodyLBS (encode jsReq) url
151 task <- httpJSON request :: IO (Response JSAsyncTask)
152 pure $ getResponseBody task
154 jsTaskStatus :: JSAsyncTask -> IO JSAsyncTaskStatus
155 jsTaskStatus (JSAsyncTask uuid) = do
156 url <- parseRequest $ unpack $ "GET http://localhost:5000/api/results/" <> uuid <> "/status"
157 status <- httpJSON url
158 pure $ getResponseBody status
160 jsTaskResponse :: JSAsyncTask -> IO JSAsyncTaskResponse
161 jsTaskResponse (JSAsyncTask uuid) = do
162 url <- parseRequest $ unpack $ "GET http://localhost:5000/api/results/" <> uuid
163 result <- httpJSON url
164 pure $ getResponseBody result
166 waitForJsTask :: JSAsyncTask -> IO JSAsyncTaskResponse
167 waitForJsTask jsTask = wait' 0
169 wait' :: Int -> IO JSAsyncTaskResponse
171 status <- jsTaskStatus jsTask
172 if taskReady status then
173 jsTaskResponse jsTask
176 panic "[waitForJsTask] waited for 1 minute and still no answer from JohnSnow NLP"
178 printDebug "[waitForJsTask] task not ready, waiting" counter
179 _ <- threadDelay $ 1000000*1
182 getPosTagAndLems :: Lang -> Text -> IO PosSentences
183 getPosTagAndLems l t = do
184 jsPosTask <- jsRequest t (JSPOS l)
185 jsLemmaTask <- jsRequest t (JSLemma l)
187 -- wait for both tasks
188 jsPos <- waitForJsTask jsPosTask
189 jsLemma <- waitForJsTask jsLemmaTask
191 pure $ jsAsyncTaskResponseToSentences jsPos jsLemma
193 nlp :: Lang -> Text -> IO PosSentences
194 nlp = getPosTagAndLems