2 Module : Gargantext.Core.Text.List.CSV
4 Copyright : (c) CNRS, 2018-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 CSV parser for Gargantext corpus files.
15 module Gargantext.Core.Text.List.CSV where
17 import GHC.IO (FilePath)
19 import Control.Applicative
20 import Control.Monad (mzero)
22 import Data.Char (ord)
24 import Data.Either (Either(Left, Right))
25 import Data.List (null)
26 import Data.Text (Text, pack)
27 import qualified Data.Text as DT
28 import qualified Data.ByteString.Lazy as BL
30 import Data.Vector (Vector)
31 import qualified Data.Vector as V
33 import Gargantext.Prelude hiding (length)
34 import Gargantext.Core.Text.Context
36 ------------------------------------------------------------------------
38 csvMapTermList :: FilePath -> IO TermList
39 csvMapTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp
41 csv2list :: CsvListType -> Vector CsvList -> TermList
42 csv2list lt vs = V.toList $ V.map (\(CsvList _ label forms)
43 -> (DT.words label, [DT.words label] <> (filter (not . null) . map DT.words $ DT.splitOn csvListFormsDelimiter forms)))
44 $ V.filter (\l -> csvList_status l == lt ) vs
46 ------------------------------------------------------------------------
47 data CsvListType = CsvMap | CsvStop | CsvCandidate
48 deriving (Read, Show, Eq)
49 ------------------------------------------------------------------------
50 -- CSV List Main Configuration
51 csvListFieldDelimiter :: Char
52 csvListFieldDelimiter = '\t'
54 csvListFormsDelimiter :: Text
55 csvListFormsDelimiter = "|&|"
56 ------------------------------------------------------------------------
57 data CsvList = CsvList
58 { csvList_status :: !CsvListType
59 , csvList_label :: !Text
60 , csvList_forms :: !Text
63 ------------------------------------------------------------------------
64 instance FromNamedRecord CsvList where
65 parseNamedRecord r = CsvList <$> r .: "status"
69 instance ToNamedRecord CsvList where
70 toNamedRecord (CsvList s l f) =
71 namedRecord [ "status" .= s
75 ------------------------------------------------------------------------
76 instance FromField CsvListType where
77 parseField "map" = pure CsvMap
78 parseField "main" = pure CsvCandidate
79 parseField "stop" = pure CsvStop
82 instance ToField CsvListType where
83 toField CsvMap = "map"
84 toField CsvCandidate = "main"
85 toField CsvStop = "stop"
86 ------------------------------------------------------------------------
87 csvDecodeOptions :: DecodeOptions
88 csvDecodeOptions = (defaultDecodeOptions
89 {decDelimiter = fromIntegral $ ord csvListFieldDelimiter}
92 csvEncodeOptions :: EncodeOptions
93 csvEncodeOptions = ( defaultEncodeOptions
94 {encDelimiter = fromIntegral $ ord csvListFieldDelimiter}
96 ------------------------------------------------------------------------
97 fromCsvListFile :: FilePath -> IO (Header, Vector CsvList)
98 fromCsvListFile fp = do
99 csvData <- BL.readFile fp
100 case decodeByNameWith csvDecodeOptions csvData of
101 Left e -> panic (pack e)
102 Right csvList -> pure csvList
103 ------------------------------------------------------------------------
104 toCsvListFile :: FilePath -> (Header, Vector CsvList) -> IO ()
105 toCsvListFile fp (h, vs) = BL.writeFile fp $
106 encodeByNameWith csvEncodeOptions h (V.toList vs)
107 ------------------------------------------------------------------------