2 Module : Gargantext.Text.Parsers.RIS.Presse
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Presse RIS format parser en enricher.
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Text.Parsers.RIS.Presse (presseEnrich) where
19 import Data.Either (either)
20 import Data.List (lookup)
21 import Data.Tuple.Extra (first)
22 import Control.Applicative
23 import Data.Attoparsec.ByteString (Parser, try, string, takeTill, take, manyTill, many1, endOfInput, parseOnly)
24 import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine)
25 import Data.ByteString (ByteString, concat, length)
26 import Data.ByteString.Char8 (pack)
27 import Data.Monoid ((<>))
28 import Gargantext.Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
29 import Gargantext.Text.Parsers.RIS (withField)
30 import qualified Data.List as DL
31 -------------------------------------------------------------
33 -------------------------------------------------------------
34 presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
35 presseEnrich = (withField "DA" presseDate)
36 . (withField "LA" presseLang)
37 . (map (first presseFields))
39 presseDate :: ByteString -> [(ByteString, ByteString)]
40 presseDate str = either (const []) identity $ parseOnly parseDate str
42 parseDate :: Parser [(ByteString, ByteString)]
47 pure $ map (first (\x -> "publication_" <> x))
51 , ("date", yea <> "-" <> mon <> "-" <> day <> "T0:0:0")
54 presseLang :: ByteString -> [(ByteString, ByteString)]
55 presseLang "Français" = [("language", "FR")]
56 presseLang "English" = [("language", "EN")]
57 presseLang _ = undefined
60 presseFields :: ByteString -> ByteString
62 | champs == "AU" = "authors"
63 | champs == "TI" = "title"
64 | champs == "JF" = "source"
65 | champs == "DI" = "doi"
66 | champs == "UR" = "url"
67 | champs == "N2" = "abstract"
72 fixTitle :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
73 fixTitle ns = ns <> [ti, ab]