2 Module : Gargantext.Core.Text.Corpus.Parsers.RIS
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 RIS is a standardized tag format developed by Research Information
12 Systems, Incorporated (the format name refers to the company) to enable
13 citation programs to exchange data.
15 [More](https://en.wikipedia.org/wiki/RIS_(file_format))
20 module Gargantext.Core.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
22 import Data.List (lookup)
23 import Control.Applicative
24 import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
25 import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
26 import Data.ByteString (ByteString, intercalate)
27 import Gargantext.Prelude hiding (takeWhile, take)
28 import qualified Data.List as DL
29 -------------------------------------------------------------
31 parser :: Parser [[(ByteString, ByteString)]]
34 ns <- many1 (notice "\nTY -")
37 notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
38 notice s = start *> many (fieldWith field) <* end
40 field :: Parser ByteString
41 field = "\n" *> take 2 <* " - "
43 start :: Parser ByteString
44 start = s *> takeTill isEndOfLine
46 end :: Parser ByteString
47 end = "\nER -" *> takeTill isEndOfLine
50 fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
53 txt <- takeTill isEndOfLine
55 let txts' = case DL.length txts > 0 of
58 pure (name, intercalate ";" ([txt] <> txts'))
61 lines :: Parser [ByteString]
64 line :: Parser ByteString
65 line = "\n " *> takeTill isEndOfLine
67 -------------------------------------------------------------
68 -- Field for First elem of a Tuple, Key for corresponding Map
69 onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
70 -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
71 onField k f m = m <> ( maybe [] f (lookup k m) )