]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Corpus/API/Isidore.hs
optimisation for temporal matching
[gargantext.git] / src / Gargantext / Text / Corpus / API / Isidore.hs
1 {-|
2 Module : Gargantext.Text.Corpus.API.Isidore
3 Description : To query French Humanities publication database from its API
4 Copyright : (c) CNRS, 2019-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE NoImplicitPrelude #-}
13 {-# LANGUAGE ScopedTypeVariables #-}
14 {-# LANGUAGE OverloadedStrings #-}
15
16 module Gargantext.Text.Corpus.API.Isidore where
17
18 import System.FilePath (FilePath())
19 import Data.Text (Text)
20 import Gargantext.Core (Lang(..))
21 import Gargantext.Database.Types.Node (HyperdataDocument(..))
22 import Gargantext.Prelude
23 import Isidore.Client
24 import Servant.Client
25 import qualified Data.Text as Text
26 import qualified Gargantext.Text.Corpus.Parsers.Date as Date
27 import qualified Isidore as Isidore
28 import Gargantext.Text.Corpus.Parsers.CSV (writeDocs2Csv)
29 import Gargantext.Text.Corpus.Parsers (cleanText)
30
31 -- | TODO work with the ServantErr
32 get :: Lang -> Maybe Isidore.Limit
33 -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
34 -> IO [HyperdataDocument]
35 get la l q a = do
36 let
37 printErr (DecodeFailure e _) = panic e
38 printErr e = panic (cs $ show e)
39
40 toIsidoreDocs :: Reply -> [IsidoreDoc]
41 toIsidoreDocs (ReplyOnly r) = [r]
42 toIsidoreDocs (Replies rs) = rs
43
44 iDocs <- either printErr _content <$> Isidore.get l q a
45
46 hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
47 pure hDocs
48
49 isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
50 -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
51 -> IO ()
52 isidore2csvFile fp la li tq aq = do
53 hdocs <- get la li tq aq
54 writeDocs2Csv fp hdocs
55
56 isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
57 isidoreToDoc l (IsidoreDoc t a d u s as) = do
58 let
59 author :: Author -> Text
60 author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
61 author (Authors aus) = Text.intercalate ". " $ map author aus
62
63 creator2text :: Creator -> Text
64 creator2text (Creator au) = author au
65 creator2text (Creators aus') = Text.intercalate ". " $ map author aus'
66
67 langText :: LangText -> Text
68 langText (LangText _l t1) = t1
69 langText (OnlyText t2 ) = t2
70 langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
71
72 (utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit l (maybe (Just "2019") (Just) d)
73
74 pure $ HyperdataDocument (Just "Isidore")
75 Nothing
76 u
77 Nothing
78 Nothing
79 Nothing
80 (Just $ cleanText $ langText t)
81 (creator2text <$> as)
82 Nothing
83 (Just $ maybe "Nothing" identity $ _sourceName <$> s)
84 (cleanText <$> langText <$> a)
85 (fmap (Text.pack . show) utcTime)
86 (pub_year)
87 (pub_month)
88 (pub_day)
89 Nothing
90 Nothing
91 Nothing
92 (Just $ (Text.pack . show) l)
93
94