]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Searx.hs
[framewrite] better line parsing
[gargantext.git] / src / Gargantext / API / Node / Corpus / Searx.hs
1 {-# LANGUAGE NamedFieldPuns #-}
2 {-# LANGUAGE TemplateHaskell #-}
3
4 module Gargantext.API.Node.Corpus.Searx where
5
6
7
8 import Control.Lens (view)
9 import qualified Data.Aeson as Aeson
10 import Data.Aeson.TH (deriveJSON)
11 import qualified Data.Text as T
12 import GHC.Generics (Generic)
13 import Network.HTTP.Client
14 import Network.HTTP.Client.TLS
15
16 import qualified Prelude as Prelude
17 import Protolude (encodeUtf8, Text, Either)
18 import Gargantext.Prelude
19 import Gargantext.Prelude.Config
20
21 import Gargantext.Core (Lang(..))
22 import qualified Gargantext.Core.Text.Corpus.API as API
23 import Gargantext.Core.Utils.Prefix (unPrefix)
24 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
25 import Gargantext.Database.Admin.Types.Node (CorpusId)
26 import Gargantext.Database.Prelude (hasConfig)
27
28
29 data SearxResult = SearxResult
30 { _sr_url :: Text
31 , _sr_title :: Text
32 , _sr_content :: Maybe Text
33 , _sr_engine :: Text
34 , _sr_score :: Double
35 , _sr_category :: Text
36 , _sr_pretty_url :: Text }
37 deriving (Show, Eq, Generic)
38 -- , _sr_parsed_url
39 -- , _sr_engines
40 -- , _sr_positions
41
42 $(deriveJSON (unPrefix "_sr_") ''SearxResult)
43
44 data SearxResponse = SearxResponse
45 { _srs_query :: Text
46 , _srs_number_of_results :: Int
47 , _srs_results :: [SearxResult] }
48 deriving (Show, Eq, Generic)
49 -- , _srs_answers
50 -- , _srs_corrections
51 -- , _srs_infoboxes
52 -- , _srs_suggestions :: [Text]
53 -- , _srs_unresponsive_engines :: [Text] }
54
55 $(deriveJSON (unPrefix "_srs_") ''SearxResponse)
56
57 data FetchSearxParams = FetchSearxParams
58 { _fsp_language :: Lang
59 , _fsp_manager :: Manager
60 , _fsp_pageno :: Int
61 , _fsp_query :: Text
62 , _fsp_url :: Text
63 }
64
65 fetchSearxPage :: FetchSearxParams -> IO (Either Prelude.String SearxResponse)
66 fetchSearxPage (FetchSearxParams { _fsp_language
67 , _fsp_manager
68 , _fsp_pageno
69 , _fsp_query
70 , _fsp_url }) = do
71 -- searx search API:
72 -- https://searx.github.io/searx/dev/search_api.html?highlight=json
73 req <- parseRequest $ T.unpack _fsp_url
74 let request = urlEncodedBody
75 [ --("category_general", "1")
76 ("q", encodeUtf8 _fsp_query)
77 , ("categories", "news") -- https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/70#note_3976
78 , ("pageno", encodeUtf8 $ T.pack $ show _fsp_pageno)
79 --, ("time_range", "None")
80 , ("language", encodeUtf8 $ T.pack $ show _fsp_language)
81 , ("format", "json")
82 ] req
83 res <- httpLbs request _fsp_manager
84 let dec = Aeson.eitherDecode $ responseBody res :: (Either Prelude.String SearxResponse)
85 pure dec
86
87 triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
88 => CorpusId
89 -> API.Query
90 -> Lang
91 -> m ()
92
93 triggerSearxSearch cid q l = do
94 printDebug "[triggerSearxSearch] cid" cid
95 printDebug "[triggerSearxSearch] q" q
96 printDebug "[triggerSearxSearch] l" l
97 cfg <- view hasConfig
98 let surl = _gc_frame_searx_url cfg
99 printDebug "[triggerSearxSearch] surl" surl
100
101 manager <- liftBase $ newManager tlsManagerSettings
102 res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
103 , _fsp_manager = manager
104 , _fsp_pageno = 1
105 , _fsp_query = q
106 , _fsp_url = surl }
107
108 printDebug "[triggerSearxSearch] res" res
109
110 pure ()