]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Corpus/API/Isidore.hs
[refactoring] add some default extensions to package.yaml
[gargantext.git] / src / Gargantext / Text / Corpus / API / Isidore.hs
1 {-|
2 Module : Gargantext.Text.Corpus.API.Isidore
3 Description : To query French Humanities publication database from its API
4 Copyright : (c) CNRS, 2019-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Text.Corpus.API.Isidore where
15
16 import System.FilePath (FilePath())
17 import Data.Text (Text)
18 import Gargantext.Core (Lang(..))
19 import Gargantext.Database.Admin.Types.Node (HyperdataDocument(..))
20 import Gargantext.Prelude
21 import Isidore.Client
22 import Servant.Client
23 import qualified Data.Text as Text
24 import qualified Gargantext.Text.Corpus.Parsers.Date as Date
25 import qualified Isidore as Isidore
26 import Gargantext.Text.Corpus.Parsers.CSV (writeDocs2Csv)
27 import Gargantext.Text.Corpus.Parsers (cleanText)
28
29 -- | TODO work with the ServantErr
30 get :: Lang -> Maybe Isidore.Limit
31 -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
32 -> IO [HyperdataDocument]
33 get la l q a = do
34 let
35 printErr (DecodeFailure e _) = panic e
36 printErr e = panic (cs $ show e)
37
38 toIsidoreDocs :: Reply -> [IsidoreDoc]
39 toIsidoreDocs (ReplyOnly r) = [r]
40 toIsidoreDocs (Replies rs) = rs
41
42 iDocs <- either printErr _content <$> Isidore.get l q a
43
44 hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
45 pure hDocs
46
47 isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
48 -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
49 -> IO ()
50 isidore2csvFile fp la li tq aq = do
51 hdocs <- get la li tq aq
52 writeDocs2Csv fp hdocs
53
54 isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
55 isidoreToDoc l (IsidoreDoc t a d u s as) = do
56 let
57 author :: Author -> Text
58 author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
59 author (Authors aus) = Text.intercalate ". " $ map author aus
60
61 creator2text :: Creator -> Text
62 creator2text (Creator au) = author au
63 creator2text (Creators aus') = Text.intercalate ". " $ map author aus'
64
65 langText :: LangText -> Text
66 langText (LangText _l t1) = t1
67 langText (OnlyText t2 ) = t2
68 langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
69
70 (utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit l (maybe (Just "2019") (Just) d)
71
72 pure $ HyperdataDocument (Just "Isidore")
73 Nothing
74 u
75 Nothing
76 Nothing
77 Nothing
78 (Just $ cleanText $ langText t)
79 (creator2text <$> as)
80 Nothing
81 (Just $ maybe "Nothing" identity $ _sourceName <$> s)
82 (cleanText <$> langText <$> a)
83 (fmap (Text.pack . show) utcTime)
84 (pub_year)
85 (pub_month)
86 (pub_day)
87 Nothing
88 Nothing
89 Nothing
90 (Just $ (Text.pack . show) l)
91
92