]> Git — Sourcephile - gargantext.git/blob - src/Data/Gargantext/NLP/CoreNLP.hs
[FEAT] NLP functions. Servant ClientM need to be fixed.
[gargantext.git] / src / Data / Gargantext / NLP / CoreNLP.hs
1 {-# LANGUAGE DataKinds #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE TypeOperators #-}
4 {-# LANGUAGE TemplateHaskell #-}
5
6 module Data.Gargantext.NLP.CoreNLP where
7
8 import Data.Aeson
9 import Data.Aeson.TH (deriveJSON)
10 import Data.Proxy
11 import GHC.Generics
12 import Network.HTTP.Client (newManager, defaultManagerSettings)
13 import Servant.API
14 import Servant.Client
15
16 import Data.Gargantext.Prelude
17 import Data.Gargantext.Utils.Prefix (unPrefix)
18 import Data.Text (Text)
19
20 data Token = Token { _tokenIndex :: Int
21 , _tokenWord :: Text
22 , _tokenOriginalText :: Text
23 , _tokenLemma :: Text
24 , _tokenCharacterOffsetBegin :: Int
25 , _tokenCharacterOffsetEnd :: Int
26 , _tokenPos :: Text
27 , _tokenNer :: Text
28 , _tokenBefore :: Text
29 , _tokenAfter :: Text
30 } deriving (Show, Generic)
31 $(deriveJSON (unPrefix "_token") ''Token)
32
33 data Sentence = Sentence { _sentenceIndex :: Int
34 , _sentenceToken :: [Token]
35 } deriving (Show, Generic)
36
37 $(deriveJSON (unPrefix "_sentence") ''Sentence)
38
39 data Properties = Properties { _propertiesAnnotators :: Text
40 , _propertiesOutputFormat :: Text
41 } deriving (Show, Generic)
42
43 $(deriveJSON (unPrefix "_properties") ''Properties)
44
45 data Sentences = Sentences { sentences :: [Sentence]}
46 deriving (Show, Generic)
47 instance ToJSON Sentences
48
49 -- API Client configuration
50
51 -- Example of Client Request :
52 -- wget --post-data 'Alexandre Grothendieck is a mathematician who lived in France which is a european country. There is another sentence here.' 'localhost:9000/?properties={"annotators": "tokenize,ssplit,pos,ner", "outputFormat": "json"}' -O
53
54 -- the result is Sentence as a JSON
55 -- {"sentences":[{"index":0,"tokens":[{"index":1,"word":"Alexandre","originalText":"Alexandre","lemma":"Alexandre","characterOffsetBegin":0,"characterOffsetEnd":9,"pos":"NNP","ner":"PERSON","before":"","after":" "},{"index":2,"word":"Grothendieck","originalText":"Grothendieck","lemma":"Grothendieck","characterOffsetBegin":10,"characterOffsetEnd":22,"pos":"NNP","ner":"PERSON","before":" ","after":" "},{"index":3,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":23,"characterOffsetEnd":25,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":4,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":26,"characterOffsetEnd":27,"pos":"DT","ner":"O","before":" ","after":" "},{"index":5,"word":"mathematician","originalText":"mathematician","lemma":"mathematician","characterOffsetBegin":28,"characterOffsetEnd":41,"pos":"NN","ner":"O","before":" ","after":" "},{"index":6,"word":"who","originalText":"who","lemma":"who","characterOffsetBegin":42,"characterOffsetEnd":45,"pos":"WP","ner":"O","before":" ","after":" "},{"index":7,"word":"lived","originalText":"lived","lemma":"live","characterOffsetBegin":46,"characterOffsetEnd":51,"pos":"VBD","ner":"O","before":" ","after":" "},{"index":8,"word":"in","originalText":"in","lemma":"in","characterOffsetBegin":52,"characterOffsetEnd":54,"pos":"IN","ner":"O","before":" ","after":" "},{"index":9,"word":"France","originalText":"France","lemma":"France","characterOffsetBegin":55,"characterOffsetEnd":61,"pos":"NNP","ner":"LOCATION","before":" ","after":" "},{"index":10,"word":"which","originalText":"which","lemma":"which","characterOffsetBegin":62,"characterOffsetEnd":67,"pos":"WDT","ner":"O","before":" ","after":" "},{"index":11,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":68,"characterOffsetEnd":70,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":12,"word":"a","originalText":"a","lemma":"a","characterOffsetBegin":71,"characterOffsetEnd":72,"pos":"DT","ner":"O","before":" ","after":" "},{"index":13,"word":"european","originalText":"european","lemma":"european","characterOffsetBegin":73,"characterOffsetEnd":81,"pos":"JJ","ner":"O","before":" ","after":" "},{"index":14,"word":"country","originalText":"country","lemma":"country","characterOffsetBegin":82,"characterOffsetEnd":89,"pos":"NN","ner":"O","before":" ","after":""},{"index":15,"word":".","originalText":".","lemma":".","characterOffsetBegin":89,"characterOffsetEnd":90,"pos":".","ner":"O","before":"","after":" "}]},{"index":1,"tokens":[{"index":1,"word":"There","originalText":"There","lemma":"there","characterOffsetBegin":91,"characterOffsetEnd":96,"pos":"EX","ner":"O","before":" ","after":" "},{"index":2,"word":"is","originalText":"is","lemma":"be","characterOffsetBegin":97,"characterOffsetEnd":99,"pos":"VBZ","ner":"O","before":" ","after":" "},{"index":3,"word":"another","originalText":"another","lemma":"another","characterOffsetBegin":100,"characterOffsetEnd":107,"pos":"DT","ner":"O","before":" ","after":" "},{"index":4,"word":"sentence","originalText":"sentence","lemma":"sentence","characterOffsetBegin":108,"characterOffsetEnd":116,"pos":"NN","ner":"O","before":" ","after":" "},{"index":5,"wo
56
57
58 type API = "" :> QueryParam "properties" Properties :> ReqBody '[JSON] String :> Post '[JSON] String
59
60 corenlp :: Maybe Properties -> Text -> ClientM Sentence
61 corenlp p t = client api
62
63 -- text2nlp :: Text -> ClientM
64
65 api :: Proxy API
66 api = Proxy
67
68
69 -- corenlp t = client api
70
71 -- | URI scheme to use
72 --data Scheme =
73 -- Http -- ^ http://
74 -- | Https -- ^ https://
75 --
76 ---- | Simple data type to represent the target of HTTP requests
77 ---- for servant's automatically-generated clients.
78 --data BaseUrl = BaseUrl
79 -- { baseUrlScheme :: Scheme -- ^ URI scheme to use
80 -- , baseUrlHost :: String -- ^ host (eg "haskell.org")
81 -- , baseUrlPort :: Int -- ^ port (eg 80)
82 -- , baseUrlPath :: String -- ^ path (eg "/a/b/c")
83 -- }
84 --
85
86 queries :: ClientM (Text, Properties)
87 queries = do
88 let text = "Alexandre Grothendieck is free even in a sentence."
89 let prop = Properties "tokenize,ssplit,pos,ner" "json"
90 return (text, prop)
91
92 run :: IO ()
93 run = do
94 manager <- newManager defaultManagerSettings
95 res <- runClientM queries (ClientEnv manager (BaseUrl Http "localhost" 9000 ""))
96 case res of
97 Left err -> putStrLn $ "Error: " ++ show err
98 Right x -> do
99 print x
100
101
102
103
104
105
106