2 Module : Gargantext.Text.Corpus.Parsers.RIS
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 RIS is a standardized tag format developed by Research Information
12 Systems, Incorporated (the format name refers to the company) to enable
13 citation programs to exchange data.
15 [More](https://en.wikipedia.org/wiki/RIS_(file_format))
20 module Gargantext.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
22 import Data.List (lookup)
23 import Control.Applicative
24 import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
25 import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
26 import Data.ByteString (ByteString, concat)
27 import Data.Monoid ((<>))
28 import Gargantext.Prelude hiding (takeWhile, take)
29 import qualified Data.List as DL
30 -------------------------------------------------------------
32 parser :: Parser [[(ByteString, ByteString)]]
35 ns <- many1 (notice "\nTY -")
38 notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
39 notice s = start *> many (fieldWith field) <* end
41 field :: Parser ByteString
42 field = "\n" *> take 2 <* " - "
44 start :: Parser ByteString
45 start = s *> takeTill isEndOfLine
47 end :: Parser ByteString
48 end = "\nER -" *> takeTill isEndOfLine
51 fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
54 txt <- takeTill isEndOfLine
56 let txts' = case DL.length txts > 0 of
59 pure (name, concat ([txt] <> txts'))
62 lines :: Parser [ByteString]
65 line :: Parser ByteString
66 line = "\n " *> takeTill isEndOfLine
68 -------------------------------------------------------------
69 -- Field for First elem of a Tuple, Key for corresponding Map
70 onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
71 -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
72 onField k f m = m <> ( maybe [] f (lookup k m) )