]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/List/CSV.hs
WIP Gargantext/Text/Terms/WithList
[gargantext.git] / src / Gargantext / Text / List / CSV.hs
1 {-|
2 Module : Gargantext.Text.List.CSV
3 Description :
4 Copyright : (c) CNRS, 2018-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 CSV parser for Gargantext corpus files.
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE DeriveGeneric #-}
17
18 module Gargantext.Text.List.CSV where
19
20 import GHC.IO (FilePath)
21
22 import Control.Applicative
23 import Control.Monad (mzero)
24
25 import Data.Char (ord)
26 import Data.Csv
27 import Data.Either (Either(Left, Right))
28 import Data.Text (Text, pack)
29 import qualified Data.ByteString.Lazy as BL
30
31 import Data.Vector (Vector)
32 import qualified Data.Vector as V
33
34 import Gargantext.Prelude hiding (length)
35 -- import Gargantext.Text.List.Types
36 ------------------------------------------------------------------------
37
38 --csv2lists :: Vector CsvList -> Lists
39 --csv2lists v = V.foldl' (\e (CsvList listType label forms) -> insertLists lt label forms e) emptyLists v
40
41 ------------------------------------------------------------------------
42 data CsvListType = CsvMap | CsvStop | CsvCandidate
43 deriving (Read, Show, Eq)
44 ------------------------------------------------------------------------
45 -- CSV List Main Configuration
46 csvListFieldDelimiter :: Char
47 csvListFieldDelimiter = '\t'
48
49 csvListFormsDelimiter :: Text
50 csvListFormsDelimiter = "|&|"
51 ------------------------------------------------------------------------
52 data CsvList = CsvList
53 { csvList_status :: !CsvListType
54 , csvList_label :: !Text
55 , csvList_forms :: !Text
56 }
57 deriving (Show)
58 ------------------------------------------------------------------------
59 instance FromNamedRecord CsvList where
60 parseNamedRecord r = CsvList <$> r .: "status"
61 <*> r .: "label"
62 <*> r .: "forms"
63
64 instance ToNamedRecord CsvList where
65 toNamedRecord (CsvList s l f) =
66 namedRecord [ "status" .= s
67 , "label" .= l
68 , "forms" .= f
69 ]
70 ------------------------------------------------------------------------
71 instance FromField CsvListType where
72 parseField "map" = pure CsvMap
73 parseField "main" = pure CsvCandidate
74 parseField "stop" = pure CsvStop
75 parseField _ = mzero
76
77 instance ToField CsvListType where
78 toField CsvMap = "map"
79 toField CsvCandidate = "main"
80 toField CsvStop = "stop"
81 ------------------------------------------------------------------------
82 csvDecodeOptions :: DecodeOptions
83 csvDecodeOptions = (defaultDecodeOptions
84 {decDelimiter = fromIntegral $ ord csvListFieldDelimiter}
85 )
86
87 csvEncodeOptions :: EncodeOptions
88 csvEncodeOptions = ( defaultEncodeOptions
89 {encDelimiter = fromIntegral $ ord csvListFieldDelimiter}
90 )
91 ------------------------------------------------------------------------
92 fromCsvListFile :: FilePath -> IO (Header, Vector CsvList)
93 fromCsvListFile fp = do
94 csvData <- BL.readFile fp
95 case decodeByNameWith csvDecodeOptions csvData of
96 Left e -> panic (pack e)
97 Right csvList -> pure csvList
98 ------------------------------------------------------------------------
99 toCsvListFile :: FilePath -> (Header, Vector CsvList) -> IO ()
100 toCsvListFile fp (h, vs) = BL.writeFile fp $
101 encodeByNameWith csvEncodeOptions h (V.toList vs)
102 ------------------------------------------------------------------------