2 Module : Gargantext.Text.Corpus.API.Isidore
3 Description : To query French Humanities publication database from its API
4 Copyright : (c) CNRS, 2019-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE ScopedTypeVariables #-}
14 module Gargantext.Text.Corpus.API.Isidore where
16 import System.FilePath (FilePath())
17 import Data.Text (Text)
18 import Gargantext.Core (Lang(..))
19 import Gargantext.Database.Admin.Types.Node (HyperdataDocument(..))
20 import Gargantext.Prelude
23 import qualified Data.Text as Text
24 import qualified Gargantext.Text.Corpus.Parsers.Date as Date
25 import qualified Isidore as Isidore
26 import Gargantext.Text.Corpus.Parsers.CSV (writeDocs2Csv)
27 import Gargantext.Text.Corpus.Parsers (cleanText)
29 -- | TODO work with the ServantErr
30 get :: Lang -> Maybe Isidore.Limit
31 -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
32 -> IO [HyperdataDocument]
35 printErr (DecodeFailure e _) = panic e
36 printErr e = panic (cs $ show e)
38 toIsidoreDocs :: Reply -> [IsidoreDoc]
39 toIsidoreDocs (ReplyOnly r) = [r]
40 toIsidoreDocs (Replies rs) = rs
42 iDocs <- either printErr _content <$> Isidore.get l q a
44 hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
47 isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
48 -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
50 isidore2csvFile fp la li tq aq = do
51 hdocs <- get la li tq aq
52 writeDocs2Csv fp hdocs
54 isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
55 isidoreToDoc l (IsidoreDoc t a d u s as) = do
57 author :: Author -> Text
58 author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
59 author (Authors aus) = Text.intercalate ". " $ map author aus
61 creator2text :: Creator -> Text
62 creator2text (Creator au) = author au
63 creator2text (Creators aus') = Text.intercalate ". " $ map author aus'
65 langText :: LangText -> Text
66 langText (LangText _l t1) = t1
67 langText (OnlyText t2 ) = t2
68 langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
70 (utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit l (maybe (Just "2019") (Just) d)
72 pure $ HyperdataDocument (Just "Isidore")
78 (Just $ cleanText $ langText t)
81 (Just $ maybe "Nothing" identity $ _sourceName <$> s)
82 (cleanText <$> langText <$> a)
83 (fmap (Text.pack . show) utcTime)
90 (Just $ (Text.pack . show) l)