]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
[FIX] Ngrams in list
[gargantext.git] / src / Gargantext / Core / Text / Corpus / Parsers / Json2Csv.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.Parsers.Json2Csv
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Json parser to export towoard CSV GargV3 format.
11 (Export from the Patent Database.)
12
13 -}
14
15 {-# LANGUAGE TemplateHaskell #-}
16
17 module Gargantext.Core.Text.Corpus.Parsers.Json2Csv (json2csv, readPatents)
18 where
19
20 import Prelude (read)
21 import Data.Aeson
22 import Data.Aeson.TH (deriveJSON)
23 import Data.ByteString.Lazy (readFile)
24 import Data.Text (Text, unpack)
25 import Gargantext.Core.Utils.Prefix (unPrefix)
26 import Gargantext.Prelude
27 import System.IO (FilePath)
28 import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
29 import Data.Vector (fromList)
30
31 data Patent = Patent { _patent_title :: Text
32 , _patent_abstract :: Text
33 , _patent_year :: Text
34 , _patent_id :: Text
35 } deriving (Show)
36
37 $(deriveJSON (unPrefix "_patent_") ''Patent)
38
39 readPatents :: FilePath -> IO (Maybe [Patent])
40 readPatents fp = decode <$> readFile fp
41
42 type FilePathIn = FilePath
43 type FilePathOut = FilePath
44
45 json2csv :: FilePathIn -> FilePathOut -> IO ()
46 json2csv fin fout = do
47 patents <- maybe (panic "json2csv error") identity <$> readPatents fin
48 writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
49
50 patent2csvDoc :: Patent -> CsvDoc
51 patent2csvDoc (Patent { .. }) =
52 CsvDoc { csv_title = _patent_title
53 , csv_source = "Source"
54 , csv_publication_year = Just $ read (unpack _patent_year)
55 , csv_publication_month = Just 1
56 , csv_publication_day = Just 1
57 , csv_abstract = _patent_abstract
58 , csv_authors = "Authors" }
59
60
61
62
63