]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/API/OpenAlex.hs
Merge branch 'dev' into dev-openalex
[gargantext.git] / src / Gargantext / Core / Text / Corpus / API / OpenAlex.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.API.OpenAlex
3 Description : OpenAlex API connection
4 Copyright : (c) CNRS, 2023
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10 module Gargantext.Core.Text.Corpus.API.OpenAlex where
11
12 import Conduit
13 import qualified Data.Text as T
14 import Gargantext.Core.Text.Corpus.Query as Corpus
15 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
16 import Protolude
17 import qualified OpenAlex as OA
18 import qualified OpenAlex.Types as OA
19 import Servant.Client (ClientError)
20
21
22 get :: Text
23 -> Corpus.RawQuery
24 -> Maybe Limit
25 -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
26 get _email q _l = do
27 eRes <- OA.fetchWorksC Nothing $ Just $ Corpus.getRawQuery q
28 pure $ (\(len, docsC) -> (len, docsC .| takeC 1000 .| mapC toDoc)) <$> eRes
29
30 toDoc :: OA.Work -> HyperdataDocument
31 toDoc (OA.Work { .. } ) =
32 HyperdataDocument { _hd_bdd = Just "OpenAlex"
33 , _hd_doi = doi
34 , _hd_url = url
35 , _hd_uniqId = Just id
36 , _hd_uniqIdBdd = Just id
37 , _hd_page = firstPage biblio
38 , _hd_title = title
39 , _hd_authors = authors authorships
40 , _hd_institutes = institutes authorships
41 , _hd_source = source
42 , _hd_abstract = Just abstract_reconstructed
43 , _hd_publication_date = Just $ show publication_date
44 , _hd_publication_year = Just $ publication_year
45 , _hd_publication_month = Nothing -- TODO
46 , _hd_publication_day = Nothing -- TODO
47 , _hd_publication_hour = Nothing -- TODO
48 , _hd_publication_minute = Nothing -- TODO
49 , _hd_publication_second = Nothing -- TODO
50 , _hd_language_iso2 = language }
51 where
52 firstPage :: OA.Biblio -> Maybe Int
53 firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page
54
55 authors :: [OA.Authorship] -> Maybe Text
56 authors [] = Nothing
57 authors aus = Just $ T.intercalate ", " (getDisplayName <$> aus)
58 where
59 getDisplayName :: OA.Authorship -> Text
60 getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
61
62 institutes :: [OA.Authorship] -> Maybe Text
63 institutes [] = Nothing
64 institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus)
65 where
66 getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
67 getDisplayName :: OA.DehydratedInstitution -> Text
68 getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
69
70 source :: Maybe Text
71 source = maybe Nothing getSource primary_location
72 where
73 getSource OA.Location { source = s } = getSourceDisplayName <$> s
74 getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn