]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/Json2Csv.hs
Merge branch 'dev-corpus-with-filetype' into dev
[gargantext.git] / src / Gargantext / Text / Parsers / Json2Csv.hs
1 {-|
2 Module : Gargantext.Text.Parsers.Json2Csv
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Json parser to export towoard CSV GargV3 format.
11 (Export from the Patent Database.)
12
13 -}
14
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE OverloadedStrings #-}
17 {-# LANGUAGE DeriveGeneric #-}
18 {-# LANGUAGE TemplateHaskell #-}
19
20 module Gargantext.Text.Parsers.Json2Csv (json2csv, readPatents)
21 where
22
23 import Prelude (read)
24 import Data.Aeson
25 import Data.Aeson.TH (deriveJSON)
26 import Data.ByteString.Lazy (readFile)
27 import Data.Text (Text, unpack)
28 import Gargantext.Core.Utils.Prefix (unPrefix)
29 import Gargantext.Prelude
30 import System.IO (FilePath)
31 import Gargantext.Text.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
32 import Data.Vector (fromList)
33
34 data Patent = Patent { _patent_title :: Text
35 , _patent_abstract :: Text
36 , _patent_year :: Text
37 , _patent_id :: Text
38 } deriving (Show)
39
40 $(deriveJSON (unPrefix "_patent_") ''Patent)
41
42 readPatents :: FilePath -> IO (Maybe [Patent])
43 readPatents fp = decode <$> readFile fp
44
45 type FilePathIn = FilePath
46 type FilePathOut = FilePath
47
48 json2csv :: FilePathIn -> FilePathOut -> IO ()
49 json2csv fin fout = do
50 patents <- maybe (panic "json2csv error") identity <$> readPatents fin
51 writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
52
53 patent2csvDoc :: Patent -> CsvDoc
54 patent2csvDoc (Patent title abstract year _) =
55 CsvDoc title "Source" (read (unpack year)) 1 1 abstract "Authors"
56
57
58
59
60