]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/Parsers/RIS.hs
[FIX] Ngrams List size with candidates
[gargantext.git] / src / Gargantext / Core / Text / Corpus / Parsers / RIS.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.Parsers.RIS
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10
11 RIS is a standardized tag format developed by Research Information
12 Systems, Incorporated (the format name refers to the company) to enable
13 citation programs to exchange data.
14
15 [More](https://en.wikipedia.org/wiki/RIS_(file_format))
16
17 -}
18
19
20 module Gargantext.Core.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
21
22 import Data.List (lookup)
23 import Control.Applicative
24 import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
25 import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
26 import Data.ByteString (ByteString, concat)
27 import Gargantext.Prelude hiding (takeWhile, take)
28 import qualified Data.List as DL
29 -------------------------------------------------------------
30
31 parser :: Parser [[(ByteString, ByteString)]]
32 parser = do
33 n <- notice "TY -"
34 ns <- many1 (notice "\nTY -")
35 pure $ [n] <> ns
36
37 notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
38 notice s = start *> many (fieldWith field) <* end
39 where
40 field :: Parser ByteString
41 field = "\n" *> take 2 <* " - "
42
43 start :: Parser ByteString
44 start = s *> takeTill isEndOfLine
45
46 end :: Parser ByteString
47 end = "\nER -" *> takeTill isEndOfLine
48
49
50 fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
51 fieldWith n = do
52 name <- n
53 txt <- takeTill isEndOfLine
54 txts <- try lines
55 let txts' = case DL.length txts > 0 of
56 True -> txts
57 False -> []
58 pure (name, concat ([txt] <> txts'))
59
60
61 lines :: Parser [ByteString]
62 lines = many line
63 where
64 line :: Parser ByteString
65 line = "\n " *> takeTill isEndOfLine
66
67 -------------------------------------------------------------
68 -- Field for First elem of a Tuple, Key for corresponding Map
69 onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
70 -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
71 onField k f m = m <> ( maybe [] f (lookup k m) )
72
73
74