2 Module : Gargantext.Core.Text.Corpus.Parsers.Json2Csv
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Json parser to export towoard CSV GargV3 format.
11 (Export from the Patent Database.)
15 {-# LANGUAGE TemplateHaskell #-}
17 module Gargantext.Core.Text.Corpus.Parsers.Json2Csv (json2csv, readPatents)
22 import Data.Aeson.TH (deriveJSON)
23 import Data.ByteString.Lazy (readFile)
24 import Data.Text (Text, unpack)
25 import Gargantext.Core.Utils.Prefix (unPrefix)
26 import qualified Gargantext.Defaults as Defaults
27 import Gargantext.Prelude
28 import System.IO (FilePath)
29 import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
30 import Data.Vector (fromList)
32 data Patent = Patent { _patent_title :: Text
33 , _patent_abstract :: Text
34 , _patent_year :: Text
38 $(deriveJSON (unPrefix "_patent_") ''Patent)
40 readPatents :: FilePath -> IO (Maybe [Patent])
41 readPatents fp = decode <$> readFile fp
43 type FilePathIn = FilePath
44 type FilePathOut = FilePath
46 json2csv :: FilePathIn -> FilePathOut -> IO ()
47 json2csv fin fout = do
48 patents <- maybe (panic "json2csv error") identity <$> readPatents fin
49 writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
51 patent2csvDoc :: Patent -> CsvDoc
52 patent2csvDoc (Patent { .. }) =
53 CsvDoc { csv_title = _patent_title
54 , csv_source = "Source"
55 , csv_publication_year = Just $ read (unpack _patent_year)
56 , csv_publication_month = Just $ Defaults.month
57 , csv_publication_day = Just $ Defaults.day
58 , csv_abstract = _patent_abstract
59 , csv_authors = "Authors" }