]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/RIS.hs
[PARSERS] RIS PRESSE fix bug of \r
[gargantext.git] / src / Gargantext / Text / Parsers / RIS.hs
1 {-|
2 Module : Gargantext.Text.Parsers.RIS
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 RIS is a standardized tag format developed by Research Information
12 Systems, Incorporated (the format name refers to the company) to enable
13 citation programs to exchange data.
14
15 [More](https://en.wikipedia.org/wiki/RIS_(file_format))
16
17 -}
18
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21
22 module Gargantext.Text.Parsers.RIS (risParser, withField) where
23
24 import Data.Either (either)
25 import Data.List (lookup)
26 import Data.Tuple.Extra (first)
27 import Control.Applicative
28 import Data.Attoparsec.ByteString (Parser, try, string, takeTill, take, manyTill, many1, endOfInput, parseOnly)
29 import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine)
30 import Data.ByteString (ByteString, concat, length)
31 import Data.ByteString.Char8 (pack)
32 import Data.Monoid ((<>))
33 import Gargantext.Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
34 import qualified Data.List as DL
35 -------------------------------------------------------------
36
37 risParser :: Parser [[(ByteString, ByteString)]]
38 risParser = do
39 n <- notice "TY -"
40 ns <- many1 (notice "\nTY -")
41 pure $ [n] <> ns
42
43 notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
44 notice s = start *> many field <* end
45 where
46 start :: Parser ByteString
47 start = s *> takeTill isEndOfLine
48
49 end :: Parser ByteString
50 end = "\nER -" *> takeTill isEndOfLine
51
52 field :: Parser (ByteString, ByteString)
53 field = do
54 name <- "\n" *> take 2 <* " - "
55 txt <- takeTill isEndOfLine
56 txts <- try lines
57 let txts' = case DL.length txts > 0 of
58 True -> txts
59 False -> []
60 pure (name, concat ([txt] <> txts'))
61
62 lines :: Parser [ByteString]
63 lines = many line
64 where
65 line :: Parser ByteString
66 line = "\n " *> takeTill isEndOfLine
67
68 -------------------------------------------------------------
69 withField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
70 -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
71 withField k f m = m <> ( maybe [] f (lookup k m) )
72