]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/CSV.hs
[CSV Parser] Parser for Gargantext (legacy) CSV files.
[gargantext.git] / src / Gargantext / Text / Parsers / CSV.hs
1 {-|
2 Module : Gargantext.Text.Parsers.CSV
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE DeriveGeneric #-}
17
18 module Gargantext.Text.Parsers.CSV where
19
20 import GHC.Generics (Generic)
21 import GHC.IO (FilePath)
22 import Data.Either (Either(Left, Right))
23 import Data.Text (Text)
24 import Control.Applicative
25 import qualified Data.ByteString.Lazy as BL
26 import Data.Csv
27 import qualified Data.Vector as V
28 import Data.Text (pack)
29
30 import Data.Char (ord)
31 import Gargantext.Prelude
32
33 data CsvDoc = CsvDoc
34 { title :: !Text
35 , source :: !Text
36 , publication_year :: !Int
37 , publication_month :: !Int
38 , publication_day :: !Int
39 , abstract :: !Text
40 , authors :: !Text
41 }
42 deriving (Show, Generic)
43
44 instance FromNamedRecord CsvDoc where
45 parseNamedRecord r = CsvDoc <$> r .: "title"
46 <*> r .: "source"
47 <*> r .: "publication_year"
48 <*> r .: "publication_month"
49 <*> r .: "publication_day"
50 <*> r .: "abstract"
51 <*> r .: "authors"
52
53 instance ToNamedRecord CsvDoc where
54 toNamedRecord (CsvDoc t s py pm pd abst aut) =
55 namedRecord [ "title" .= t
56 , "source" .= s
57 , "publication_year" .= py
58 , "publication_month" .= pm
59 , "publication_day" .= pd
60 , "abstract" .= abst
61 , "authors" .= aut
62 ]
63
64
65 csvDecodeOptions :: DecodeOptions
66 csvDecodeOptions = (defaultDecodeOptions {decDelimiter = fromIntegral $ ord '\t'} )
67
68 csvEncodeOptions :: EncodeOptions
69 csvEncodeOptions = (defaultEncodeOptions {encDelimiter = fromIntegral $ ord '\t'} )
70
71
72 readCsv :: FilePath -> IO (Header, V.Vector CsvDoc)
73 readCsv fp = do
74 csvData <- BL.readFile fp
75 case decodeByNameWith csvDecodeOptions csvData of
76 Left e -> panic (pack e)
77 Right csvDocs -> pure csvDocs
78
79 writeCsv :: FilePath -> (Header, V.Vector CsvDoc) -> IO ()
80 writeCsv fp (h, vs) = BL.writeFile fp $
81 encodeByNameWith csvEncodeOptions h (V.toList vs)
82