]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/IsidoreApi.hs
[REFACTO] ngrams unsupervised.
[gargantext.git] / src / Gargantext / Text / Parsers / IsidoreApi.hs
1 {-|
2 Module : Gargantext.Text.Parsers.IsidoreApi
3 Description : To query French Humanities publication database from its API
4 Copyright : (c) CNRS, 2019-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE ScopedTypeVariables #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Text.Parsers.IsidoreApi where
18
19 import System.FilePath (FilePath())
20 import Data.Text (Text)
21 import Gargantext.Core (Lang(..))
22 import Gargantext.Database.Types.Node (HyperdataDocument(..))
23 import Gargantext.Prelude
24 import Isidore.Client
25 import Servant.Client
26 import qualified Data.Text as Text
27 import qualified Gargantext.Text.Parsers.Date as Date
28 import qualified Isidore as Isidore
29 import Gargantext.Text.Parsers.CSV (writeDocs2Csv)
30 import Gargantext.Text.Parsers (cleanText)
31
32 -- | TODO work with the ServantErr
33 get :: Lang -> Maybe Isidore.Limit
34 -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
35 -> IO [HyperdataDocument]
36 get la l q a = do
37 let
38 printErr (DecodeFailure e _) = panic e
39 printErr e = panic (cs $ show e)
40
41 toIsidoreDocs :: Reply -> [IsidoreDoc]
42 toIsidoreDocs (ReplyOnly r) = [r]
43 toIsidoreDocs (Replies rs) = rs
44
45 iDocs <- either printErr _content <$> Isidore.get l q a
46
47 hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
48 pure hDocs
49
50 isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
51 -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
52 -> IO ()
53 isidore2csvFile fp la li tq aq = do
54 hdocs <- get la li tq aq
55 writeDocs2Csv fp hdocs
56
57 isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
58 isidoreToDoc l (IsidoreDoc t a d u s as) = do
59 let
60 author :: Author -> Text
61 author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
62 author (Authors aus) = Text.intercalate ". " $ map author aus
63
64 creator2text :: Creator -> Text
65 creator2text (Creator au) = author au
66 creator2text (Creators aus') = Text.intercalate ". " $ map author aus'
67
68 langText :: LangText -> Text
69 langText (LangText _l t1) = t1
70 langText (OnlyText t2 ) = t2
71 langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
72
73 (utcTime, (pub_year, pub_month, pub_day)) <- Date.split l (maybe (Just "2019") (Just) d)
74
75 pure $ HyperdataDocument (Just "IsidoreApi")
76 Nothing
77 u
78 Nothing
79 Nothing
80 Nothing
81 (Just $ cleanText $ langText t)
82 Nothing
83 (creator2text <$> as)
84 (_sourceName <$> s)
85 (cleanText <$> langText <$> a)
86 (fmap (Text.pack . show) utcTime)
87 (pub_year)
88 (pub_month)
89 (pub_day)
90 Nothing
91 Nothing
92 Nothing
93 (Just $ (Text.pack . show) l)
94
95