1 {-# LANGUAGE DataKinds #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE TypeOperators #-}
4 {-# LANGUAGE TemplateHaskell #-}
6 module Data.Gargantext.NLP.CoreNLP where
9 import Data.Aeson.TH (deriveJSON)
12 import Data.Gargantext.Prelude
13 import Data.Gargantext.Utils.Prefix (unPrefix)
14 import Data.Text (Text)
16 import qualified Data.ByteString.Char8 as S8
17 import qualified Data.Yaml as Yaml
18 import Network.HTTP.Simple
21 data Token = Token { _tokenIndex :: Int
23 , _tokenOriginalText :: Text
25 , _tokenCharacterOffsetBegin :: Int
26 , _tokenCharacterOffsetEnd :: Int
29 , _tokenBefore :: Text
31 } deriving (Show, Generic)
32 $(deriveJSON (unPrefix "_token") ''Token)
34 data Sentence = Sentence { _sentenceIndex :: Int
35 , _sentenceTokens :: [Token]
36 } deriving (Show, Generic)
38 $(deriveJSON (unPrefix "_sentence") ''Sentence)
40 data Properties = Properties { _propertiesAnnotators :: Text
41 , _propertiesOutputFormat :: Text
42 } deriving (Show, Generic)
44 $(deriveJSON (unPrefix "_properties") ''Properties)
46 data Sentences = Sentences { sentences :: [Sentence]}
47 deriving (Show, Generic)
48 instance ToJSON Sentences
49 instance FromJSON Sentences
53 corenlpPretty :: String -> IO ()
54 corenlpPretty txt = do
55 url <- parseRequest "POST http://localhost:9000/?properties={\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
56 let request = setRequestBodyJSON txt url
57 response <- httpJSON request
59 -- putStrLn $ "The status code was: " ++
60 -- show (getResponseStatusCode response)
61 -- print $ getResponseHeader "Content-Type" response
62 S8.putStrLn $ Yaml.encode (getResponseBody response :: Sentences)
64 corenlp :: String -> IO Sentences
66 url <- parseRequest "POST http://localhost:9000/?properties={\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
67 let request = setRequestBodyJSON txt url
68 response <- httpJSON request
69 pure (getResponseBody response :: Sentences)
72 -- Part Of Speech example
73 -- parseWith _tokenPos "Hello world."
74 -- == [[("``","``"),("Hello","UH"),("world","NN"),(".","."),("''","''")]]
76 -- Named Entity Recognition example
77 -- parseWith _tokenNer "Hello world of Peter."
78 -- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
79 parseWith :: (Token -> t) -> String -> IO [[(Text, t)]]
80 parseWith f s = pm (pm (\t -> (_tokenWord t, f t))) <$> pm _sentenceTokens <$> sentences <$> corenlp s