]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Searx.hs
Merge branch '74-dev-ethercalc-csv-import' into 74-dev-frame-calc-csv-import
[gargantext.git] / src / Gargantext / API / Node / Corpus / Searx.hs
1 {-# LANGUAGE NamedFieldPuns #-}
2 {-# LANGUAGE TemplateHaskell #-}
3
4 module Gargantext.API.Node.Corpus.Searx where
5
6 import Control.Lens (view)
7 import qualified Data.Aeson as Aeson
8 import Data.Aeson.TH (deriveJSON)
9 import qualified Data.Text as T
10 import GHC.Generics (Generic)
11 import Network.HTTP.Client
12 import Network.HTTP.Client.TLS
13
14 import qualified Prelude as Prelude
15 import Protolude (encodeUtf8, Text, Either)
16 import Gargantext.Prelude
17 import Gargantext.Prelude.Config
18
19 import Gargantext.Core (Lang(..))
20 import qualified Gargantext.Core.Text.Corpus.API as API
21 import Gargantext.Core.Utils.Prefix (unPrefix)
22 import Gargantext.Database.Action.Flow (FlowCmdM)
23 import Gargantext.Database.Admin.Types.Node (CorpusId)
24 import Gargantext.Database.Prelude (hasConfig)
25
26
27 data SearxResult = SearxResult
28 { _sr_url :: Text
29 , _sr_title :: Text
30 , _sr_content :: Maybe Text
31 , _sr_engine :: Text
32 , _sr_score :: Double
33 , _sr_category :: Text
34 , _sr_pretty_url :: Text }
35 deriving (Show, Eq, Generic)
36 -- , _sr_parsed_url
37 -- , _sr_engines
38 -- , _sr_positions
39
40 $(deriveJSON (unPrefix "_sr_") ''SearxResult)
41
42 data SearxResponse = SearxResponse
43 { _srs_query :: Text
44 , _srs_number_of_results :: Int
45 , _srs_results :: [SearxResult] }
46 deriving (Show, Eq, Generic)
47 -- , _srs_answers
48 -- , _srs_corrections
49 -- , _srs_infoboxes
50 -- , _srs_suggestions :: [Text]
51 -- , _srs_unresponsive_engines :: [Text] }
52
53 $(deriveJSON (unPrefix "_srs_") ''SearxResponse)
54
55 data FetchSearxParams = FetchSearxParams
56 { _fsp_manager :: Manager
57 , _fsp_pageno :: Int
58 , _fsp_query :: Text
59 , _fsp_url :: Text
60 }
61
62 fetchSearxPage :: FetchSearxParams -> IO (Either Prelude.String SearxResponse)
63 fetchSearxPage (FetchSearxParams { _fsp_manager
64 , _fsp_pageno
65 , _fsp_query
66 , _fsp_url }) = do
67 -- searx search API:
68 -- https://searx.github.io/searx/dev/search_api.html?highlight=json
69 req <- parseRequest $ T.unpack _fsp_url
70 let request = urlEncodedBody
71 [ --("category_general", "1")
72 ("q", encodeUtf8 _fsp_query)
73 , ("pageno", encodeUtf8 $ T.pack $ show _fsp_pageno)
74 --, ("time_range", "None")
75 --, ("language", "en-US") -- TODO
76 , ("format", "json")
77 ] req
78 res <- httpLbs request _fsp_manager
79 let dec = Aeson.eitherDecode $ responseBody res :: (Either Prelude.String SearxResponse)
80 pure dec
81
82 triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
83 => CorpusId
84 -> API.Query
85 -> Lang
86 -> m ()
87
88 triggerSearxSearch cid q l = do
89 printDebug "[triggerSearxSearch] cid" cid
90 printDebug "[triggerSearxSearch] q" q
91 printDebug "[triggerSearxSearch] l" l
92 cfg <- view hasConfig
93 let surl = _gc_frame_searx_url cfg
94 printDebug "[triggerSearxSearch] surl" surl
95
96 manager <- liftBase $ newManager tlsManagerSettings
97 res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_manager = manager
98 , _fsp_pageno = 1
99 , _fsp_query = q
100 , _fsp_url = surl }
101
102 printDebug "[triggerSearxSearch] res" res
103
104 pure ()