2 Module : Gargantext.Utils.SpacyNLP
3 Description : John Snow NLP API connexion
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Spacy ecosystem: https://github.com/explosion/spaCy
12 Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
16 {-# LANGUAGE TemplateHaskell #-}
18 module Gargantext.Utils.SpacyNLP where
21 import Data.Aeson (encode)
22 import Data.Aeson.TH (deriveJSON)
23 import Data.Text hiding (map, group, filter, concat, zip)
24 import Gargantext.Core (Lang(..))
25 import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
26 import Gargantext.Core.Types (POS(..), NER(..))
27 import Gargantext.Core.Utils.Prefix (unPrefix)
28 import Gargantext.Prelude
29 import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
32 data SpacyData = SpacyData { _spacy_data :: ![SpacyText]}
35 data SpacyText = SpacyText { _spacy_text :: !Text
36 , _spacy_tags :: ![SpacyTags]
39 SpacyTags { _spacyTags_text :: !Text
40 , _spacyTags_text_with_ws :: !Text
41 , _spacyTags_whitespace :: !Text
42 , _spacyTags_head :: !Text
43 , _spacyTags_head_index :: !Int
44 , _spacyTags_left_edge :: !Text
45 , _spacyTags_right_edge :: !Text
46 , _spacyTags_index :: Int
47 , _spacyTags_ent_type :: !NER
48 , _spacyTags_ent_iob :: !Text
49 , _spacyTags_lemma :: !Text
50 , _spacyTags_normalized :: !Text
51 , _spacyTags_shape :: !Text
52 , _spacyTags_prefix :: !Text
53 , _spacyTags_suffix :: !Text
54 , _spacyTags_is_alpha :: Bool
55 , _spacyTags_is_ascii :: Bool
56 , _spacyTags_is_digit :: Bool
57 , _spacyTags_is_title :: Bool
58 , _spacyTags_is_punct :: Bool
59 , _spacyTags_is_left_punct :: Bool
60 , _spacyTags_is_right_punct :: Bool
61 , _spacyTags_is_space :: Bool
62 , _spacyTags_is_bracket :: Bool
63 , _spacyTags_is_quote :: Bool
64 , _spacyTags_is_currency :: Bool
65 , _spacyTags_like_url :: Bool
66 , _spacyTags_like_num :: Bool
67 , _spacyTags_like_email :: Bool
68 , _spacyTags_is_oov :: Bool
69 , _spacyTags_is_stop :: Bool
70 , _spacyTags_pos :: POS
71 , _spacyTags_tag :: POS
72 , _spacyTags_dep :: !Text
73 , _spacyTags_lang :: !Text
74 , _spacyTags_prob :: !Int
75 , _spacyTags_char_offset :: !Int
79 data SpacyRequest = SpacyRequest { _spacyRequest_text :: !Text }
82 spacyRequest :: Text -> IO SpacyData
84 url <- parseRequest $ unpack "POST http://localhost:8001/pos"
85 let request = setRequestBodyLBS (encode $ SpacyRequest txt) url
86 result <- httpJSON request :: IO (Response SpacyData)
87 pure $ getResponseBody result
91 deriveJSON (unPrefix "_spacy_") ''SpacyData
92 deriveJSON (unPrefix "_spacy_") ''SpacyText
93 deriveJSON (unPrefix "_spacyTags_") ''SpacyTags
94 deriveJSON (unPrefix "_spacyRequest_") ''SpacyRequest
96 makeLenses ''SpacyData
97 makeLenses ''SpacyText
98 makeLenses ''SpacyTags
99 makeLenses ''SpacyRequest
101 ----------------------------------------------------------------
102 spacyTagsToToken :: SpacyTags -> Token
103 spacyTagsToToken st = Token (st ^. spacyTags_index)
104 (st ^. spacyTags_normalized)
105 (st ^. spacyTags_text)
106 (st ^. spacyTags_lemma)
107 (st ^. spacyTags_head_index)
108 (st ^. spacyTags_char_offset)
109 (Just $ st ^. spacyTags_pos)
110 (Just $ st ^. spacyTags_ent_type)
111 (Just $ st ^. spacyTags_prefix)
112 (Just $ st ^. spacyTags_suffix)
114 spacyDataToPosSentences :: SpacyData -> PosSentences
115 spacyDataToPosSentences (SpacyData ds) = PosSentences
116 $ map (\(i, ts) -> Sentence i ts)
118 $ map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds
120 -----------------------------------------------------------------
122 nlp :: Lang -> Text -> IO PosSentences
123 nlp FR txt = spacyDataToPosSentences <$> spacyRequest txt
124 nlp _ _ = panic "Make sure you have the right model for your lang for spacy Server"