]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/List/CSV.hs
[Ngrams Table] Front and Back implemented (but need next step for optimization and...
[gargantext.git] / src / Gargantext / Text / List / CSV.hs
1 {-|
2 Module : Gargantext.Text.List.CSV
3 Description :
4 Copyright : (c) CNRS, 2018-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 CSV parser for Gargantext corpus files.
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE DeriveGeneric #-}
17
18 module Gargantext.Text.List.CSV where
19
20 import GHC.IO (FilePath)
21
22 import Control.Applicative
23 import Control.Monad (mzero)
24
25 import Data.Char (ord)
26 import Data.Csv
27 import Data.Either (Either(Left, Right))
28 import Data.List (null)
29 import Data.Text (Text, pack)
30 import qualified Data.Text as DT
31 import qualified Data.ByteString.Lazy as BL
32
33 import Data.Vector (Vector)
34 import qualified Data.Vector as V
35
36 import Gargantext.Prelude hiding (length)
37 import Gargantext.Text.Context
38
39 ------------------------------------------------------------------------
40
41 csvGraphTermList :: FilePath -> IO TermList
42 csvGraphTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp
43
44 csv2list :: CsvListType -> Vector CsvList -> TermList
45 csv2list lt vs = V.toList $ V.map (\(CsvList _ label forms)
46 -> (DT.words label, filter (not . null) . map DT.words $ DT.splitOn csvListFormsDelimiter forms))
47 $ V.filter (\l -> csvList_status l == lt ) vs
48
49 ------------------------------------------------------------------------
50 data CsvListType = CsvMap | CsvStop | CsvCandidate
51 deriving (Read, Show, Eq)
52 ------------------------------------------------------------------------
53 -- CSV List Main Configuration
54 csvListFieldDelimiter :: Char
55 csvListFieldDelimiter = '\t'
56
57 csvListFormsDelimiter :: Text
58 csvListFormsDelimiter = "|&|"
59 ------------------------------------------------------------------------
60 data CsvList = CsvList
61 { csvList_status :: !CsvListType
62 , csvList_label :: !Text
63 , csvList_forms :: !Text
64 }
65 deriving (Show)
66 ------------------------------------------------------------------------
67 instance FromNamedRecord CsvList where
68 parseNamedRecord r = CsvList <$> r .: "status"
69 <*> r .: "label"
70 <*> r .: "forms"
71
72 instance ToNamedRecord CsvList where
73 toNamedRecord (CsvList s l f) =
74 namedRecord [ "status" .= s
75 , "label" .= l
76 , "forms" .= f
77 ]
78 ------------------------------------------------------------------------
79 instance FromField CsvListType where
80 parseField "map" = pure CsvMap
81 parseField "main" = pure CsvCandidate
82 parseField "stop" = pure CsvStop
83 parseField _ = mzero
84
85 instance ToField CsvListType where
86 toField CsvMap = "map"
87 toField CsvCandidate = "main"
88 toField CsvStop = "stop"
89 ------------------------------------------------------------------------
90 csvDecodeOptions :: DecodeOptions
91 csvDecodeOptions = (defaultDecodeOptions
92 {decDelimiter = fromIntegral $ ord csvListFieldDelimiter}
93 )
94
95 csvEncodeOptions :: EncodeOptions
96 csvEncodeOptions = ( defaultEncodeOptions
97 {encDelimiter = fromIntegral $ ord csvListFieldDelimiter}
98 )
99 ------------------------------------------------------------------------
100 fromCsvListFile :: FilePath -> IO (Header, Vector CsvList)
101 fromCsvListFile fp = do
102 csvData <- BL.readFile fp
103 case decodeByNameWith csvDecodeOptions csvData of
104 Left e -> panic (pack e)
105 Right csvList -> pure csvList
106 ------------------------------------------------------------------------
107 toCsvListFile :: FilePath -> (Header, Vector CsvList) -> IO ()
108 toCsvListFile fp (h, vs) = BL.writeFile fp $
109 encodeByNameWith csvEncodeOptions h (V.toList vs)
110 ------------------------------------------------------------------------