]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
[istex] scroll API fetch, first draft
[gargantext.git] / src / Gargantext / Core / Text / Corpus / Parsers / Json2Csv.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.Parsers.Json2Csv
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Json parser to export towoard CSV GargV3 format.
11 (Export from the Patent Database.)
12
13 -}
14
15 {-# LANGUAGE TemplateHaskell #-}
16
17 module Gargantext.Core.Text.Corpus.Parsers.Json2Csv (json2csv, readPatents)
18 where
19
20 import Prelude (read)
21 import Data.Aeson
22 import Data.Aeson.TH (deriveJSON)
23 import Data.ByteString.Lazy (readFile)
24 import Data.Text (Text, unpack)
25 import Gargantext.Core.Utils.Prefix (unPrefix)
26 import qualified Gargantext.Defaults as Defaults
27 import Gargantext.Prelude
28 import System.IO (FilePath)
29 import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
30 import Data.Vector (fromList)
31
32 data Patent = Patent { _patent_title :: Text
33 , _patent_abstract :: Text
34 , _patent_year :: Text
35 , _patent_id :: Text
36 } deriving (Show)
37
38 $(deriveJSON (unPrefix "_patent_") ''Patent)
39
40 readPatents :: FilePath -> IO (Maybe [Patent])
41 readPatents fp = decode <$> readFile fp
42
43 type FilePathIn = FilePath
44 type FilePathOut = FilePath
45
46 json2csv :: FilePathIn -> FilePathOut -> IO ()
47 json2csv fin fout = do
48 patents <- maybe (panic "json2csv error") identity <$> readPatents fin
49 writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
50
51 patent2csvDoc :: Patent -> CsvDoc
52 patent2csvDoc (Patent { .. }) =
53 CsvDoc { csv_title = _patent_title
54 , csv_source = "Source"
55 , csv_publication_year = Just $ read (unpack _patent_year)
56 , csv_publication_month = Just $ Defaults.month
57 , csv_publication_day = Just $ Defaults.day
58 , csv_abstract = _patent_abstract
59 , csv_authors = "Authors" }
60
61
62
63
64