]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Utils/SpacyNLP.hs
Merge remote-tracking branch 'origin/184-dev-add-support-for-multiple-languages-in...
[gargantext.git] / src / Gargantext / Utils / SpacyNLP.hs
1 {-|
2 Module : Gargantext.Utils.SpacyNLP
3 Description : John Snow NLP API connexion
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Spacy ecosystem: https://github.com/explosion/spaCy
11
12 Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
13
14 -}
15
16 {-# LANGUAGE TemplateHaskell #-}
17
18 module Gargantext.Utils.SpacyNLP where
19
20 import Control.Lens
21 import Data.Aeson (encode)
22 import Data.Aeson.TH (deriveJSON)
23 import Data.Text hiding (map, group, filter, concat, zip)
24 import Gargantext.Core (Lang(..))
25 import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
26 import Gargantext.Core.Types (POS(..), NER(..))
27 import Gargantext.Core.Utils.Prefix (unPrefix)
28 import Gargantext.Prelude
29 import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
30 import Network.URI (URI(..))
31
32
33 data SpacyData = SpacyData { _spacy_data :: ![SpacyText]}
34 deriving (Show)
35
36 data SpacyText = SpacyText { _spacy_text :: !Text
37 , _spacy_tags :: ![SpacyTags]
38 } deriving (Show)
39 data SpacyTags =
40 SpacyTags { _spacyTags_text :: !Text
41 , _spacyTags_text_with_ws :: !Text
42 , _spacyTags_whitespace :: !Text
43 , _spacyTags_head :: !Text
44 , _spacyTags_head_index :: !Int
45 , _spacyTags_left_edge :: !Text
46 , _spacyTags_right_edge :: !Text
47 , _spacyTags_index :: Int
48 , _spacyTags_ent_type :: !NER
49 , _spacyTags_ent_iob :: !Text
50 , _spacyTags_lemma :: !Text
51 , _spacyTags_normalized :: !Text
52 , _spacyTags_shape :: !Text
53 , _spacyTags_prefix :: !Text
54 , _spacyTags_suffix :: !Text
55 , _spacyTags_is_alpha :: Bool
56 , _spacyTags_is_ascii :: Bool
57 , _spacyTags_is_digit :: Bool
58 , _spacyTags_is_title :: Bool
59 , _spacyTags_is_punct :: Bool
60 , _spacyTags_is_left_punct :: Bool
61 , _spacyTags_is_right_punct :: Bool
62 , _spacyTags_is_space :: Bool
63 , _spacyTags_is_bracket :: Bool
64 , _spacyTags_is_quote :: Bool
65 , _spacyTags_is_currency :: Bool
66 , _spacyTags_like_url :: Bool
67 , _spacyTags_like_num :: Bool
68 , _spacyTags_like_email :: Bool
69 , _spacyTags_is_oov :: Bool
70 , _spacyTags_is_stop :: Bool
71 , _spacyTags_pos :: POS
72 , _spacyTags_tag :: POS
73 , _spacyTags_dep :: !Text
74 , _spacyTags_lang :: !Text
75 , _spacyTags_prob :: !Int
76 , _spacyTags_char_offset :: !Int
77 } deriving (Show)
78
79
80 data SpacyRequest = SpacyRequest { _spacyRequest_text :: !Text }
81 deriving (Show)
82
83 spacyRequest :: URI -> Text -> IO SpacyData
84 spacyRequest uri txt = do
85 req <- parseRequest $ "POST " <> show (uri { uriPath = "/pos" })
86 let request = setRequestBodyLBS (encode $ SpacyRequest txt) req
87 result <- httpJSON request :: IO (Response SpacyData)
88 pure $ getResponseBody result
89
90
91 -- Instances
92 deriveJSON (unPrefix "_spacy_") ''SpacyData
93 deriveJSON (unPrefix "_spacy_") ''SpacyText
94 deriveJSON (unPrefix "_spacyTags_") ''SpacyTags
95 deriveJSON (unPrefix "_spacyRequest_") ''SpacyRequest
96
97 makeLenses ''SpacyData
98 makeLenses ''SpacyText
99 makeLenses ''SpacyTags
100 makeLenses ''SpacyRequest
101
102 ----------------------------------------------------------------
103 spacyTagsToToken :: SpacyTags -> Token
104 spacyTagsToToken st = Token (st ^. spacyTags_index)
105 (st ^. spacyTags_normalized)
106 (st ^. spacyTags_text)
107 (st ^. spacyTags_lemma)
108 (st ^. spacyTags_head_index)
109 (st ^. spacyTags_char_offset)
110 (Just $ st ^. spacyTags_pos)
111 (Just $ st ^. spacyTags_ent_type)
112 (Just $ st ^. spacyTags_prefix)
113 (Just $ st ^. spacyTags_suffix)
114
115 spacyDataToPosSentences :: SpacyData -> PosSentences
116 spacyDataToPosSentences (SpacyData ds) = PosSentences
117 $ map (\(i, ts) -> Sentence i ts)
118 $ zip [1..]
119 $ map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds
120
121 -----------------------------------------------------------------
122
123 nlp :: URI -> Lang -> Text -> IO PosSentences
124 nlp uri FR txt = spacyDataToPosSentences <$> spacyRequest uri txt
125 nlp _ _ _ = panic "Make sure you have the right model for your lang for spacy Server"
126 -- nlp FR txt = spacyDataToPosSentences <$> spacyRequest txt
127 -- nlp _ _ = panic "Make sure you have the right model for your lang for spacy Server"