2 Module : Gargantext.Core.Text.Corpus.API.OpenAlex
3 Description : OpenAlex API connection
4 Copyright : (c) CNRS, 2023
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 module Gargantext.Core.Text.Corpus.API.OpenAlex where
13 import qualified Data.Text as T
14 import Gargantext.Core (Lang, toISO639Lang)
15 import Gargantext.Core.Text.Corpus.Query as Corpus
16 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
18 import qualified OpenAlex as OA
19 import qualified OpenAlex.Types as OA
20 import Servant.Client (ClientError)
27 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
28 get _email q lang mLimit = do
29 let limit = getLimit $ fromMaybe 10000 mLimit
30 let mFilter = (\l -> "language:" <> l) <$> toISO639Lang lang
31 eRes <- OA.fetchWorksC Nothing mFilter $ Just $ Corpus.getRawQuery q
32 pure $ (\(len, docsC) -> (len, docsC .| takeC limit .| mapC toDoc)) <$> eRes
34 toDoc :: OA.Work -> HyperdataDocument
35 toDoc (OA.Work { .. } ) =
36 HyperdataDocument { _hd_bdd = Just "OpenAlex"
39 , _hd_uniqId = Just id
40 , _hd_uniqIdBdd = Just id
41 , _hd_page = firstPage biblio
43 , _hd_authors = authors authorships
44 , _hd_institutes = institutes authorships
46 , _hd_abstract = Just abstract_reconstructed
47 , _hd_publication_date = Just $ show publication_date
48 , _hd_publication_year = Just $ publication_year
49 , _hd_publication_month = Nothing -- TODO
50 , _hd_publication_day = Nothing -- TODO
51 , _hd_publication_hour = Nothing -- TODO
52 , _hd_publication_minute = Nothing -- TODO
53 , _hd_publication_second = Nothing -- TODO
54 , _hd_language_iso2 = language }
56 firstPage :: OA.Biblio -> Maybe Int
57 firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page
59 authors :: [OA.Authorship] -> Maybe Text
61 authors aus = Just $ T.intercalate ", " $ catMaybes (getDisplayName <$> aus)
63 getDisplayName :: OA.Authorship -> Maybe Text
64 getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
66 institutes :: [OA.Authorship] -> Maybe Text
67 institutes [] = Nothing
68 institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus)
70 getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
71 getDisplayName :: OA.DehydratedInstitution -> Text
72 getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
75 source = maybe Nothing getSource primary_location
77 getSource OA.Location { source = s } = getSourceDisplayName <$> s
78 getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn