2 Module : Gargantext.Text.Parsers.RIS
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 RIS is a standardized tag format developed by Research Information
12 Systems, Incorporated (the format name refers to the company) to enable
13 citation programs to exchange data.
15 [More](https://en.wikipedia.org/wiki/RIS_(file_format))
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
22 module Gargantext.Text.Parsers.RIS (risParser, withField) where
24 import Data.Either (either)
25 import Data.List (lookup)
26 import Data.Tuple.Extra (first)
27 import Control.Applicative
28 import Data.Attoparsec.ByteString (Parser, try, string, takeTill, take, manyTill, many1, endOfInput, parseOnly)
29 import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine)
30 import Data.ByteString (ByteString, concat, length)
31 import Data.ByteString.Char8 (pack)
32 import Data.Monoid ((<>))
33 import Gargantext.Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
34 import qualified Data.List as DL
35 -------------------------------------------------------------
37 risParser :: Parser [[(ByteString, ByteString)]]
40 ns <- many1 (notice "\nTY -")
43 notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
44 notice s = start *> many field <* end
46 start :: Parser ByteString
47 start = s *> takeTill isEndOfLine
49 end :: Parser ByteString
50 end = "\nER -" *> takeTill isEndOfLine
52 field :: Parser (ByteString, ByteString)
54 name <- "\n" *> take 2 <* " - "
55 txt <- takeTill isEndOfLine
57 let txts' = case DL.length txts > 0 of
60 pure (name, concat ([txt] <> txts'))
62 lines :: Parser [ByteString]
65 line :: Parser ByteString
66 line = "\n " *> takeTill isEndOfLine
68 -------------------------------------------------------------
69 withField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
70 -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
71 withField k f m = m <> ( maybe [] f (lookup k m) )