]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Searx.hs
[searx] some more work on triggerSearxSearch
[gargantext.git] / src / Gargantext / API / Node / Corpus / Searx.hs
1 {-# LANGUAGE TemplateHaskell #-}
2
3 module Gargantext.API.Node.Corpus.Searx where
4
5
6
7 import Control.Lens (view)
8 import qualified Data.Aeson as Aeson
9 import Data.Aeson.TH (deriveJSON)
10 import qualified Data.Text as T
11 import GHC.Generics (Generic)
12 import Network.HTTP.Client
13 import Network.HTTP.Client.TLS
14
15 import qualified Prelude as Prelude
16 import Protolude (encodeUtf8, Text, Either)
17 import Gargantext.Prelude
18 import Gargantext.Prelude.Config
19
20 import Gargantext.Core (Lang(..))
21 import qualified Gargantext.Core.Text.Corpus.API as API
22 import Gargantext.Core.Utils.Prefix (unPrefix)
23 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
24 import Gargantext.Database.Admin.Types.Node (CorpusId)
25 import Gargantext.Database.Prelude (hasConfig)
26 import Gargantext.Database.Query.Table.Node (defaultList)
27
28
29 langToSearx :: Lang -> Text
30 langToSearx EN = "en-US"
31 langToSearx FR = "fr-FR"
32 langToSearx All = "en-US"
33
34 data SearxResult = SearxResult
35 { _sr_url :: Text
36 , _sr_title :: Text
37 , _sr_content :: Maybe Text
38 , _sr_engine :: Text
39 , _sr_score :: Double
40 , _sr_category :: Text
41 , _sr_pretty_url :: Text }
42 deriving (Show, Eq, Generic)
43 -- , _sr_parsed_url
44 -- , _sr_engines
45 -- , _sr_positions
46
47 $(deriveJSON (unPrefix "_sr_") ''SearxResult)
48
49 data SearxResponse = SearxResponse
50 { _srs_query :: Text
51 , _srs_number_of_results :: Int
52 , _srs_results :: [SearxResult] }
53 deriving (Show, Eq, Generic)
54 -- , _srs_answers
55 -- , _srs_corrections
56 -- , _srs_infoboxes
57 -- , _srs_suggestions :: [Text]
58 -- , _srs_unresponsive_engines :: [Text] }
59
60 $(deriveJSON (unPrefix "_srs_") ''SearxResponse)
61
62 data FetchSearxParams = FetchSearxParams
63 { _fsp_language :: Lang
64 , _fsp_manager :: Manager
65 , _fsp_pageno :: Int
66 , _fsp_query :: Text
67 , _fsp_url :: Text
68 }
69
70 fetchSearxPage :: FetchSearxParams -> IO (Either Prelude.String SearxResponse)
71 fetchSearxPage (FetchSearxParams { _fsp_language
72 , _fsp_manager
73 , _fsp_pageno
74 , _fsp_query
75 , _fsp_url }) = do
76 -- searx search API:
77 -- https://searx.github.io/searx/dev/search_api.html?highlight=json
78 req <- parseRequest $ T.unpack _fsp_url
79 let request = urlEncodedBody
80 [ --("category_general", "1")
81 ("q", encodeUtf8 _fsp_query)
82 , ("categories", "news") -- https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/70#note_3976
83 , ("pageno", encodeUtf8 $ T.pack $ show _fsp_pageno)
84 --, ("time_range", "None")
85 , ("language", encodeUtf8 $ langToSearx _fsp_language)
86 , ("format", "json")
87 ] req
88 res <- httpLbs request _fsp_manager
89 let dec = Aeson.eitherDecode $ responseBody res :: (Either Prelude.String SearxResponse)
90 pure dec
91
92 -- TODO Make an async task out of this?
93 triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
94 => CorpusId
95 -> API.Query
96 -> Lang
97 -> m ()
98 triggerSearxSearch cId q l = do
99 printDebug "[triggerSearxSearch] cId" cId
100 printDebug "[triggerSearxSearch] q" q
101 printDebug "[triggerSearxSearch] l" l
102 cfg <- view hasConfig
103 let surl = _gc_frame_searx_url cfg
104 printDebug "[triggerSearxSearch] surl" surl
105 listId <- defaultList cId
106 printDebug "[triggerSearxSearch] listId" listId
107
108 manager <- liftBase $ newManager tlsManagerSettings
109 res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
110 , _fsp_manager = manager
111 , _fsp_pageno = 1
112 , _fsp_query = q
113 , _fsp_url = surl }
114
115 printDebug "[triggerSearxSearch] res" res
116
117 pure ()