]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/Corpus/Searx.hs
[flow] add better progress report to flow corpus
[gargantext.git] / src / Gargantext / API / Node / Corpus / Searx.hs
1 {-# LANGUAGE TemplateHaskell #-}
2
3 module Gargantext.API.Node.Corpus.Searx where
4
5
6
7 import Control.Lens (view)
8 import qualified Data.Aeson as Aeson
9 import Data.Aeson.TH (deriveJSON)
10 import qualified Data.Text as T
11 import GHC.Generics (Generic)
12 import Network.HTTP.Client
13 import Network.HTTP.Client.TLS
14
15 import qualified Prelude as Prelude
16 import Protolude (encodeUtf8, Text, Either)
17 import Gargantext.Prelude
18 import Gargantext.Prelude.Config
19
20 import Gargantext.Core (Lang(..))
21 import qualified Gargantext.Core.Text.Corpus.API as API
22 import Gargantext.Core.Utils.Prefix (unPrefix)
23 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
24 import Gargantext.Database.Admin.Types.Node (CorpusId)
25 import Gargantext.Database.Prelude (hasConfig)
26
27
28 data SearxResult = SearxResult
29 { _sr_url :: Text
30 , _sr_title :: Text
31 , _sr_content :: Maybe Text
32 , _sr_engine :: Text
33 , _sr_score :: Double
34 , _sr_category :: Text
35 , _sr_pretty_url :: Text }
36 deriving (Show, Eq, Generic)
37 -- , _sr_parsed_url
38 -- , _sr_engines
39 -- , _sr_positions
40
41 $(deriveJSON (unPrefix "_sr_") ''SearxResult)
42
43 data SearxResponse = SearxResponse
44 { _srs_query :: Text
45 , _srs_number_of_results :: Int
46 , _srs_results :: [SearxResult] }
47 deriving (Show, Eq, Generic)
48 -- , _srs_answers
49 -- , _srs_corrections
50 -- , _srs_infoboxes
51 -- , _srs_suggestions :: [Text]
52 -- , _srs_unresponsive_engines :: [Text] }
53
54 $(deriveJSON (unPrefix "_srs_") ''SearxResponse)
55
56 data FetchSearxParams = FetchSearxParams
57 { _fsp_language :: Lang
58 , _fsp_manager :: Manager
59 , _fsp_pageno :: Int
60 , _fsp_query :: Text
61 , _fsp_url :: Text
62 }
63
64 fetchSearxPage :: FetchSearxParams -> IO (Either Prelude.String SearxResponse)
65 fetchSearxPage (FetchSearxParams { _fsp_language
66 , _fsp_manager
67 , _fsp_pageno
68 , _fsp_query
69 , _fsp_url }) = do
70 -- searx search API:
71 -- https://searx.github.io/searx/dev/search_api.html?highlight=json
72 req <- parseRequest $ T.unpack _fsp_url
73 let request = urlEncodedBody
74 [ --("category_general", "1")
75 ("q", encodeUtf8 _fsp_query)
76 , ("categories", "news") -- https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/70#note_3976
77 , ("pageno", encodeUtf8 $ T.pack $ show _fsp_pageno)
78 --, ("time_range", "None")
79 , ("language", encodeUtf8 $ T.pack $ show _fsp_language)
80 , ("format", "json")
81 ] req
82 res <- httpLbs request _fsp_manager
83 let dec = Aeson.eitherDecode $ responseBody res :: (Either Prelude.String SearxResponse)
84 pure dec
85
86 triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
87 => CorpusId
88 -> API.Query
89 -> Lang
90 -> m ()
91
92 triggerSearxSearch cid q l = do
93 printDebug "[triggerSearxSearch] cid" cid
94 printDebug "[triggerSearxSearch] q" q
95 printDebug "[triggerSearxSearch] l" l
96 cfg <- view hasConfig
97 let surl = _gc_frame_searx_url cfg
98 printDebug "[triggerSearxSearch] surl" surl
99
100 manager <- liftBase $ newManager tlsManagerSettings
101 res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
102 , _fsp_manager = manager
103 , _fsp_pageno = 1
104 , _fsp_query = q
105 , _fsp_url = surl }
106
107 printDebug "[triggerSearxSearch] res" res
108
109 pure ()