]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/OpenAlex.hs
[openalex] add support for language filter in queries
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / OpenAlex.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.OpenAlex
3 Description : OpenAlex API connection
4 Copyright : (c) CNRS, 2023
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10 module Gargantext.Core.Text.Corpus.API.OpenAlex where
11
12 import Conduit
13 import qualified Data.Text as T
14 import Gargantext.Core (Lang, toISO639Lang)
15 import Gargantext.Core.Text.Corpus.Query as Corpus
16 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
17 import Protolude
18 import qualified OpenAlex as OA
19 import qualified OpenAlex.Types as OA
20 import Servant.Client (ClientError)
21
22
23 get :: Text
24 -> Corpus.RawQuery
25 -> Lang
26 -> Maybe Limit
27 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
28 get _email q lang mLimit = do
29 let limit = getLimit $ fromMaybe 10000 mLimit
30 let mFilter = (\l -> "language:" <> l) <$> toISO639Lang lang
31 eRes <- OA.fetchWorksC Nothing mFilter $ Just $ Corpus.getRawQuery q
32 pure $ (\(len, docsC) -> (len, docsC .| takeC limit .| mapC toDoc)) <$> eRes
33
34 toDoc :: OA.Work -> HyperdataDocument
35 toDoc (OA.Work { .. } ) =
36 HyperdataDocument { _hd_bdd = Just "OpenAlex"
37 , _hd_doi = doi
38 , _hd_url = url
39 , _hd_uniqId = Just id
40 , _hd_uniqIdBdd = Just id
41 , _hd_page = firstPage biblio
42 , _hd_title = title
43 , _hd_authors = authors authorships
44 , _hd_institutes = institutes authorships
45 , _hd_source = source
46 , _hd_abstract = Just abstract_reconstructed
47 , _hd_publication_date = Just $ show publication_date
48 , _hd_publication_year = Just $ publication_year
49 , _hd_publication_month = Nothing -- TODO
50 , _hd_publication_day = Nothing -- TODO
51 , _hd_publication_hour = Nothing -- TODO
52 , _hd_publication_minute = Nothing -- TODO
53 , _hd_publication_second = Nothing -- TODO
54 , _hd_language_iso2 = language }
55 where
56 firstPage :: OA.Biblio -> Maybe Int
57 firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page
58
59 authors :: [OA.Authorship] -> Maybe Text
60 authors [] = Nothing
61 authors aus = Just $ T.intercalate ", " $ catMaybes (getDisplayName <$> aus)
62 where
63 getDisplayName :: OA.Authorship -> Maybe Text
64 getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
65
66 institutes :: [OA.Authorship] -> Maybe Text
67 institutes [] = Nothing
68 institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus)
69 where
70 getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
71 getDisplayName :: OA.DehydratedInstitution -> Text
72 getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
73
74 source :: Maybe Text
75 source = maybe Nothing getSource primary_location
76 where
77 getSource OA.Location { source = s } = getSourceDisplayName <$> s
78 getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn