2 Module : Gargantext.Core.Text.Corpus.API.OpenAlex
3 Description : OpenAlex API connection
4 Copyright : (c) CNRS, 2023
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 module Gargantext.Core.Text.Corpus.API.OpenAlex where
13 import qualified Data.Text as T
14 import Gargantext.Core.Text.Corpus.Query as Corpus
15 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
17 import qualified OpenAlex as OA
18 import qualified OpenAlex.Types as OA
19 import Servant.Client (ClientError)
25 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
27 eRes <- OA.fetchWorksC Nothing Nothing $ Just $ Corpus.getRawQuery q
28 pure $ (\(len, docsC) -> (len, docsC .| takeC 1000 .| mapC toDoc)) <$> eRes
30 toDoc :: OA.Work -> HyperdataDocument
31 toDoc (OA.Work { .. } ) =
32 HyperdataDocument { _hd_bdd = Just "OpenAlex"
35 , _hd_uniqId = Just id
36 , _hd_uniqIdBdd = Just id
37 , _hd_page = firstPage biblio
39 , _hd_authors = authors authorships
40 , _hd_institutes = institutes authorships
42 , _hd_abstract = Just abstract_reconstructed
43 , _hd_publication_date = Just $ show publication_date
44 , _hd_publication_year = Just $ publication_year
45 , _hd_publication_month = Nothing -- TODO
46 , _hd_publication_day = Nothing -- TODO
47 , _hd_publication_hour = Nothing -- TODO
48 , _hd_publication_minute = Nothing -- TODO
49 , _hd_publication_second = Nothing -- TODO
50 , _hd_language_iso2 = language }
52 firstPage :: OA.Biblio -> Maybe Int
53 firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page
55 authors :: [OA.Authorship] -> Maybe Text
57 authors aus = Just $ T.intercalate ", " $ catMaybes (getDisplayName <$> aus)
59 getDisplayName :: OA.Authorship -> Maybe Text
60 getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
62 institutes :: [OA.Authorship] -> Maybe Text
63 institutes [] = Nothing
64 institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus)
66 getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
67 getDisplayName :: OA.DehydratedInstitution -> Text
68 getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
71 source = maybe Nothing getSource primary_location
73 getSource OA.Location { source = s } = getSourceDisplayName <$> s
74 getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn