2 Module : Gargantext.Core.Text.Corpus.Parsers.Json2Csv
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Json parser to export towoard CSV GargV3 format.
11 (Export from the Patent Database.)
15 {-# LANGUAGE TemplateHaskell #-}
17 module Gargantext.Core.Text.Corpus.Parsers.Json2Csv (json2csv, readPatents)
22 import Data.Aeson.TH (deriveJSON)
23 import Data.ByteString.Lazy (readFile)
24 import Data.Text (Text, unpack)
25 import Gargantext.Core.Utils.Prefix (unPrefix)
26 import Gargantext.Prelude
27 import System.IO (FilePath)
28 import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
29 import Data.Vector (fromList)
31 data Patent = Patent { _patent_title :: Text
32 , _patent_abstract :: Text
33 , _patent_year :: Text
37 $(deriveJSON (unPrefix "_patent_") ''Patent)
39 readPatents :: FilePath -> IO (Maybe [Patent])
40 readPatents fp = decode <$> readFile fp
42 type FilePathIn = FilePath
43 type FilePathOut = FilePath
45 json2csv :: FilePathIn -> FilePathOut -> IO ()
46 json2csv fin fout = do
47 patents <- maybe (panic "json2csv error") identity <$> readPatents fin
48 writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
50 patent2csvDoc :: Patent -> CsvDoc
51 patent2csvDoc (Patent { .. }) =
52 CsvDoc { csv_title = _patent_title
53 , csv_source = "Source"
54 , csv_publication_year = Just $ read (unpack _patent_year)
55 , csv_publication_month = Just 1
56 , csv_publication_day = Just 1
57 , csv_abstract = _patent_abstract
58 , csv_authors = "Authors" }