]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Utils/SpacyNLP.hs
Doc/Comments
[gargantext.git] / src / Gargantext / Utils / SpacyNLP.hs
1 {-|
2 Module : Gargantext.Utils.SpacyNLP
3 Description : John Snow NLP API connexion
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Spacy ecosystem: https://github.com/explosion/spaCy
11
12 Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
13
14 -}
15
16 {-# LANGUAGE TemplateHaskell #-}
17
18 module Gargantext.Utils.SpacyNLP where
19
20 import Control.Lens
21 import Data.Aeson (encode)
22 import Data.Aeson.TH (deriveJSON)
23 import Data.Text hiding (map, group, filter, concat, zip)
24 import Gargantext.Core (Lang(..))
25 import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
26 import Gargantext.Core.Types (POS(..), NER(..))
27 import Gargantext.Core.Utils.Prefix (unPrefix)
28 import Gargantext.Prelude
29 import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
30
31
32 data SpacyData = SpacyData { _spacy_data :: ![SpacyText]}
33 deriving (Show)
34
35 data SpacyText = SpacyText { _spacy_text :: !Text
36 , _spacy_tags :: ![SpacyTags]
37 } deriving (Show)
38 data SpacyTags =
39 SpacyTags { _spacyTags_text :: !Text
40 , _spacyTags_text_with_ws :: !Text
41 , _spacyTags_whitespace :: !Text
42 , _spacyTags_head :: !Text
43 , _spacyTags_head_index :: !Int
44 , _spacyTags_left_edge :: !Text
45 , _spacyTags_right_edge :: !Text
46 , _spacyTags_index :: Int
47 , _spacyTags_ent_type :: !NER
48 , _spacyTags_ent_iob :: !Text
49 , _spacyTags_lemma :: !Text
50 , _spacyTags_normalized :: !Text
51 , _spacyTags_shape :: !Text
52 , _spacyTags_prefix :: !Text
53 , _spacyTags_suffix :: !Text
54 , _spacyTags_is_alpha :: Bool
55 , _spacyTags_is_ascii :: Bool
56 , _spacyTags_is_digit :: Bool
57 , _spacyTags_is_title :: Bool
58 , _spacyTags_is_punct :: Bool
59 , _spacyTags_is_left_punct :: Bool
60 , _spacyTags_is_right_punct :: Bool
61 , _spacyTags_is_space :: Bool
62 , _spacyTags_is_bracket :: Bool
63 , _spacyTags_is_quote :: Bool
64 , _spacyTags_is_currency :: Bool
65 , _spacyTags_like_url :: Bool
66 , _spacyTags_like_num :: Bool
67 , _spacyTags_like_email :: Bool
68 , _spacyTags_is_oov :: Bool
69 , _spacyTags_is_stop :: Bool
70 , _spacyTags_pos :: POS
71 , _spacyTags_tag :: POS
72 , _spacyTags_dep :: !Text
73 , _spacyTags_lang :: !Text
74 , _spacyTags_prob :: !Int
75 , _spacyTags_char_offset :: !Int
76 } deriving (Show)
77
78
79 data SpacyRequest = SpacyRequest { _spacyRequest_text :: !Text }
80 deriving (Show)
81
82 spacyRequest :: Text -> IO SpacyData
83 spacyRequest txt = do
84 url <- parseRequest $ unpack "POST http://localhost:8001/pos"
85 let request = setRequestBodyLBS (encode $ SpacyRequest txt) url
86 result <- httpJSON request :: IO (Response SpacyData)
87 pure $ getResponseBody result
88
89
90 -- Instances
91 deriveJSON (unPrefix "_spacy_") ''SpacyData
92 deriveJSON (unPrefix "_spacy_") ''SpacyText
93 deriveJSON (unPrefix "_spacyTags_") ''SpacyTags
94 deriveJSON (unPrefix "_spacyRequest_") ''SpacyRequest
95
96 makeLenses ''SpacyData
97 makeLenses ''SpacyText
98 makeLenses ''SpacyTags
99 makeLenses ''SpacyRequest
100
101 ----------------------------------------------------------------
102 spacyTagsToToken :: SpacyTags -> Token
103 spacyTagsToToken st = Token (st ^. spacyTags_index)
104 (st ^. spacyTags_normalized)
105 (st ^. spacyTags_text)
106 (st ^. spacyTags_lemma)
107 (st ^. spacyTags_head_index)
108 (st ^. spacyTags_char_offset)
109 (Just $ st ^. spacyTags_pos)
110 (Just $ st ^. spacyTags_ent_type)
111 (Just $ st ^. spacyTags_prefix)
112 (Just $ st ^. spacyTags_suffix)
113
114 spacyDataToPosSentences :: SpacyData -> PosSentences
115 spacyDataToPosSentences (SpacyData ds) = PosSentences
116 $ map (\(i, ts) -> Sentence i ts)
117 $ zip [1..]
118 $ map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds
119
120 -----------------------------------------------------------------
121
122 nlp :: Lang -> Text -> IO PosSentences
123 nlp FR txt = spacyDataToPosSentences <$> spacyRequest txt
124 nlp _ _ = panic "Make sure you have the right model for your lang for spacy Server"
125
126