2 Module : Gargantext.Text.Corpus.Parsers.RIS
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 RIS is a standardized tag format developed by Research Information
12 Systems, Incorporated (the format name refers to the company) to enable
13 citation programs to exchange data.
15 [More](https://en.wikipedia.org/wiki/RIS_(file_format))
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
22 module Gargantext.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
24 import Data.List (lookup)
25 import Control.Applicative
26 import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
27 import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
28 import Data.ByteString (ByteString, concat)
29 import Data.Monoid ((<>))
30 import Gargantext.Prelude hiding (takeWhile, take)
31 import qualified Data.List as DL
32 -------------------------------------------------------------
34 parser :: Parser [[(ByteString, ByteString)]]
37 ns <- many1 (notice "\nTY -")
40 notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
41 notice s = start *> many (fieldWith field) <* end
43 field :: Parser ByteString
44 field = "\n" *> take 2 <* " - "
46 start :: Parser ByteString
47 start = s *> takeTill isEndOfLine
49 end :: Parser ByteString
50 end = "\nER -" *> takeTill isEndOfLine
53 fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
56 txt <- takeTill isEndOfLine
58 let txts' = case DL.length txts > 0 of
61 pure (name, concat ([txt] <> txts'))
64 lines :: Parser [ByteString]
67 line :: Parser ByteString
68 line = "\n " *> takeTill isEndOfLine
70 -------------------------------------------------------------
71 -- Field for First elem of a Tuple, Key for corresponding Map
72 onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
73 -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
74 onField k f m = m <> ( maybe [] f (lookup k m) )