]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/Parsers/RIS.hs
[FEAT] Iramuteq parser (WIP)
[gargantext.git] / src / Gargantext / Core / Text / Corpus / Parsers / RIS.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.Parsers.RIS
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 RIS is a standardized tag format developed by Research Information
12 Systems, Incorporated (the format name refers to the company) to enable
13 citation programs to exchange data.
14
15 [More](https://en.wikipedia.org/wiki/RIS_(file_format))
16
17 -}
18
19 module Gargantext.Core.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
20
21 import Data.List (lookup)
22 import Control.Applicative
23 import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
24 import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
25 import Data.ByteString (ByteString, intercalate)
26 import Gargantext.Prelude hiding (takeWhile, take)
27 import qualified Data.List as DL
28
29 -------------------------------------------------------------
30 parser :: Parser [[(ByteString, ByteString)]]
31 parser = do
32 n <- notice "TY -"
33 ns <- many1 (notice "\nTY -")
34 pure $ [n] <> ns
35
36 notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
37 notice s = start *> many (fieldWith field) <* end
38 where
39 field :: Parser ByteString
40 field = "\n" *> take 2 <* " - "
41
42 start :: Parser ByteString
43 start = s *> takeTill isEndOfLine
44
45 end :: Parser ByteString
46 end = "\nER -" *> takeTill isEndOfLine
47
48
49 fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
50 fieldWith n = do
51 name <- n
52 txt <- takeTill isEndOfLine
53 txts <- try lines
54 let txts' = case DL.length txts > 0 of
55 True -> txts
56 False -> []
57 pure (name, intercalate ";" ([txt] <> txts'))
58
59
60 lines :: Parser [ByteString]
61 lines = many line
62 where
63 line :: Parser ByteString
64 line = "\n " *> takeTill isEndOfLine
65
66 -------------------------------------------------------------
67 -- Field for First elem of a Tuple, Key for corresponding Map
68 onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
69 -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
70 onField k f m = m <> ( maybe [] f (lookup k m) )
71