{-| Module : Gargantext.Text.Parsers.RIS Description : Copyright : (c) CNRS, 2017-Present License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX RIS is a standardized tag format developed by Research Information Systems, Incorporated (the format name refers to the company) to enable citation programs to exchange data. [More](https://en.wikipedia.org/wiki/RIS_(file_format)) -} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Gargantext.Text.Parsers.RIS (risParser, withField, fieldWith, lines) where import Data.Either (either) import Data.List (lookup) import Data.Tuple.Extra (first) import Control.Applicative import Data.Attoparsec.ByteString (Parser, try, string, takeTill, take, manyTill, many1, endOfInput, parseOnly) import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine) import Data.ByteString (ByteString, concat, length) import Data.ByteString.Char8 (pack) import Data.Monoid ((<>)) import Gargantext.Prelude hiding (takeWhile, take, concat, readFile, lines, concat) import qualified Data.List as DL ------------------------------------------------------------- risParser :: Parser [[(ByteString, ByteString)]] risParser = do n <- notice "TY -" ns <- many1 (notice "\nTY -") pure $ [n] <> ns notice :: Parser ByteString -> Parser [(ByteString, ByteString)] notice s = start *> many (fieldWith field) <* end where field :: Parser ByteString field = "\n" *> take 2 <* " - " start :: Parser ByteString start = s *> takeTill isEndOfLine end :: Parser ByteString end = "\nER -" *> takeTill isEndOfLine fieldWith :: Parser ByteString -> Parser (ByteString, ByteString) fieldWith n = do name <- n txt <- takeTill isEndOfLine txts <- try lines let txts' = case DL.length txts > 0 of True -> txts False -> [] pure (name, concat ([txt] <> txts')) lines :: Parser [ByteString] lines = many line where line :: Parser ByteString line = "\n " *> takeTill isEndOfLine ------------------------------------------------------------- withField :: ByteString -> (ByteString -> [(ByteString, ByteString)]) -> [(ByteString, ByteString)] -> [(ByteString, ByteString)] withField k f m = m <> ( maybe [] f (lookup k m) )