2 Module : Gargantext.Core.Text.Corpus.Parsers.RIS
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 RIS is a standardized tag format developed by Research Information
12 Systems, Incorporated (the format name refers to the company) to enable
13 citation programs to exchange data.
15 [More](https://en.wikipedia.org/wiki/RIS_(file_format))
19 module Gargantext.Core.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
21 import Data.List (lookup)
22 import Control.Applicative
23 import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
24 import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
25 import Data.ByteString (ByteString, intercalate)
26 import Gargantext.Prelude hiding (takeWhile, take)
27 import qualified Data.List as DL
29 -------------------------------------------------------------
30 parser :: Parser [[(ByteString, ByteString)]]
33 ns <- many1 (notice "\nTY -")
36 notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
37 notice s = start *> many (fieldWith field) <* end
39 field :: Parser ByteString
40 field = "\n" *> take 2 <* " - "
42 start :: Parser ByteString
43 start = s *> takeTill isEndOfLine
45 end :: Parser ByteString
46 end = "\nER -" *> takeTill isEndOfLine
49 fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
52 txt <- takeTill isEndOfLine
54 let txts' = case DL.length txts > 0 of
57 pure (name, intercalate ";" ([txt] <> txts'))
60 lines :: Parser [ByteString]
63 line :: Parser ByteString
64 line = "\n " *> takeTill isEndOfLine
66 -------------------------------------------------------------
67 -- Field for First elem of a Tuple, Key for corresponding Map
68 onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
69 -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
70 onField k f m = m <> ( maybe [] f (lookup k m) )