]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Corpus/Parsers/RIS.hs
[refactoring] add some default extensions to package.yaml
[gargantext.git] / src / Gargantext / Text / Corpus / Parsers / RIS.hs
1 {-|
2 Module : Gargantext.Text.Corpus.Parsers.RIS
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 RIS is a standardized tag format developed by Research Information
12 Systems, Incorporated (the format name refers to the company) to enable
13 citation programs to exchange data.
14
15 [More](https://en.wikipedia.org/wiki/RIS_(file_format))
16
17 -}
18
19
20 module Gargantext.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
21
22 import Data.List (lookup)
23 import Control.Applicative
24 import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
25 import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
26 import Data.ByteString (ByteString, concat)
27 import Data.Monoid ((<>))
28 import Gargantext.Prelude hiding (takeWhile, take)
29 import qualified Data.List as DL
30 -------------------------------------------------------------
31
32 parser :: Parser [[(ByteString, ByteString)]]
33 parser = do
34 n <- notice "TY -"
35 ns <- many1 (notice "\nTY -")
36 pure $ [n] <> ns
37
38 notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
39 notice s = start *> many (fieldWith field) <* end
40 where
41 field :: Parser ByteString
42 field = "\n" *> take 2 <* " - "
43
44 start :: Parser ByteString
45 start = s *> takeTill isEndOfLine
46
47 end :: Parser ByteString
48 end = "\nER -" *> takeTill isEndOfLine
49
50
51 fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
52 fieldWith n = do
53 name <- n
54 txt <- takeTill isEndOfLine
55 txts <- try lines
56 let txts' = case DL.length txts > 0 of
57 True -> txts
58 False -> []
59 pure (name, concat ([txt] <> txts'))
60
61
62 lines :: Parser [ByteString]
63 lines = many line
64 where
65 line :: Parser ByteString
66 line = "\n " *> takeTill isEndOfLine
67
68 -------------------------------------------------------------
69 -- Field for First elem of a Tuple, Key for corresponding Map
70 onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
71 -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
72 onField k f m = m <> ( maybe [] f (lookup k m) )
73