]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/List/CSV.hs
[SPECS] index a corpus with term list.
[gargantext.git] / src / Gargantext / Text / List / CSV.hs
1 {-|
2 Module : Gargantext.Text.List.CSV
3 Description :
4 Copyright : (c) CNRS, 2018-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 CSV parser for Gargantext corpus files.
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE DeriveGeneric #-}
17
18 module Gargantext.Text.List.CSV where
19
20 import GHC.Real (round)
21 import GHC.IO (FilePath)
22
23 import Control.Applicative
24 import Control.Monad (mzero)
25
26 import Data.Char (ord)
27 import Data.Csv
28 import Data.Either (Either(Left, Right))
29 import Data.Text (Text, pack, length, intercalate)
30 import qualified Data.ByteString.Lazy as BL
31
32 import Data.Vector (Vector)
33 import qualified Data.Vector as V
34
35 import Gargantext.Prelude hiding (length)
36 import Gargantext.Text.List.Types
37 ------------------------------------------------------------------------
38
39 --csv2lists :: Vector CsvList -> Lists
40 --csv2lists v = V.foldl' (\e (CsvList listType label forms) -> insertLists lt label forms e) emptyLists v
41
42 ------------------------------------------------------------------------
43 data CsvListType = CsvMap | CsvStop | CsvCandidate
44 deriving (Read, Show, Eq)
45 ------------------------------------------------------------------------
46 -- CSV List Main Configuration
47 csvListFieldDelimiter :: Char
48 csvListFieldDelimiter = '\t'
49
50 csvListFormsDelimiter :: Text
51 csvListFormsDelimiter = "|&|"
52 ------------------------------------------------------------------------
53 data CsvList = CsvList
54 { csvList_status :: !CsvListType
55 , csvList_label :: !Text
56 , csvList_forms :: !Text
57 }
58 deriving (Show)
59 ------------------------------------------------------------------------
60 instance FromNamedRecord CsvList where
61 parseNamedRecord r = CsvList <$> r .: "status"
62 <*> r .: "label"
63 <*> r .: "forms"
64
65 instance ToNamedRecord CsvList where
66 toNamedRecord (CsvList s l f) =
67 namedRecord [ "status" .= s
68 , "label" .= l
69 , "forms" .= f
70 ]
71 ------------------------------------------------------------------------
72 instance FromField CsvListType where
73 parseField "map" = pure CsvMap
74 parseField "main" = pure CsvCandidate
75 parseField "stop" = pure CsvStop
76 parseField _ = mzero
77
78 instance ToField CsvListType where
79 toField CsvMap = "map"
80 toField CsvCandidate = "main"
81 toField CsvStop = "stop"
82 ------------------------------------------------------------------------
83 csvDecodeOptions :: DecodeOptions
84 csvDecodeOptions = (defaultDecodeOptions
85 {decDelimiter = fromIntegral $ ord csvListFieldDelimiter}
86 )
87
88 csvEncodeOptions :: EncodeOptions
89 csvEncodeOptions = ( defaultEncodeOptions
90 {encDelimiter = fromIntegral $ ord csvListFieldDelimiter}
91 )
92 ------------------------------------------------------------------------
93 fromCsvListFile :: FilePath -> IO (Header, Vector CsvList)
94 fromCsvListFile fp = do
95 csvData <- BL.readFile fp
96 case decodeByNameWith csvDecodeOptions csvData of
97 Left e -> panic (pack e)
98 Right csvList -> pure csvList
99 ------------------------------------------------------------------------
100 toCsvListFile :: FilePath -> (Header, Vector CsvList) -> IO ()
101 toCsvListFile fp (h, vs) = BL.writeFile fp $
102 encodeByNameWith csvEncodeOptions h (V.toList vs)
103 ------------------------------------------------------------------------