]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Corpus/Parsers/RIS.hs
[ELEVE] mainEleve' with a witness corpus
[gargantext.git] / src / Gargantext / Text / Corpus / Parsers / RIS.hs
1 {-|
2 Module : Gargantext.Text.Corpus.Parsers.RIS
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 RIS is a standardized tag format developed by Research Information
12 Systems, Incorporated (the format name refers to the company) to enable
13 citation programs to exchange data.
14
15 [More](https://en.wikipedia.org/wiki/RIS_(file_format))
16
17 -}
18
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21
22 module Gargantext.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
23
24 import Data.List (lookup)
25 import Control.Applicative
26 import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
27 import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
28 import Data.ByteString (ByteString, concat)
29 import Data.Monoid ((<>))
30 import Gargantext.Prelude hiding (takeWhile, take)
31 import qualified Data.List as DL
32 -------------------------------------------------------------
33
34 parser :: Parser [[(ByteString, ByteString)]]
35 parser = do
36 n <- notice "TY -"
37 ns <- many1 (notice "\nTY -")
38 pure $ [n] <> ns
39
40 notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
41 notice s = start *> many (fieldWith field) <* end
42 where
43 field :: Parser ByteString
44 field = "\n" *> take 2 <* " - "
45
46 start :: Parser ByteString
47 start = s *> takeTill isEndOfLine
48
49 end :: Parser ByteString
50 end = "\nER -" *> takeTill isEndOfLine
51
52
53 fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
54 fieldWith n = do
55 name <- n
56 txt <- takeTill isEndOfLine
57 txts <- try lines
58 let txts' = case DL.length txts > 0 of
59 True -> txts
60 False -> []
61 pure (name, concat ([txt] <> txts'))
62
63
64 lines :: Parser [ByteString]
65 lines = many line
66 where
67 line :: Parser ByteString
68 line = "\n " *> takeTill isEndOfLine
69
70 -------------------------------------------------------------
71 -- Field for First elem of a Tuple, Key for corresponding Map
72 onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
73 -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
74 onField k f m = m <> ( maybe [] f (lookup k m) )
75