]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/RIS/Presse.hs
[PARSERS] RIS PRESSE fix bug of \r
[gargantext.git] / src / Gargantext / Text / Parsers / RIS / Presse.hs
1 {-|
2 Module : Gargantext.Text.Parsers.RIS.Presse
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Presse RIS format parser en enricher.
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Text.Parsers.RIS.Presse (presseEnrich) where
18
19 import Data.Either (either)
20 import Data.List (lookup)
21 import Data.Tuple.Extra (first)
22 import Control.Applicative
23 import Data.Attoparsec.ByteString (Parser, try, string, takeTill, take, manyTill, many1, endOfInput, parseOnly)
24 import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine)
25 import Data.ByteString (ByteString, concat, length)
26 import Data.ByteString.Char8 (pack)
27 import Data.Monoid ((<>))
28 import Gargantext.Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
29 import Gargantext.Text.Parsers.RIS (withField)
30 import qualified Data.List as DL
31 -------------------------------------------------------------
32
33 -------------------------------------------------------------
34 presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
35 presseEnrich = (withField "DA" presseDate)
36 . (withField "LA" presseLang)
37 . (map (first presseFields))
38
39 presseDate :: ByteString -> [(ByteString, ByteString)]
40 presseDate str = either (const []) identity $ parseOnly parseDate str
41
42 parseDate :: Parser [(ByteString, ByteString)]
43 parseDate = do
44 day <- take 2 <* "/"
45 mon <- take 2 <* "/"
46 yea <- take 4
47 pure $ map (first (\x -> "publication_" <> x))
48 [ ("day",day)
49 , ("month", mon)
50 , ("year", yea)
51 , ("date", yea <> "-" <> mon <> "-" <> day <> "T0:0:0")
52 ]
53
54 presseLang :: ByteString -> [(ByteString, ByteString)]
55 presseLang "Français" = [("language", "FR")]
56 presseLang "English" = [("language", "EN")]
57 presseLang x = [("language", x)]
58
59
60 presseFields :: ByteString -> ByteString
61 presseFields champs
62 | champs == "AU" = "authors"
63 | champs == "TI" = "title"
64 | champs == "JF" = "source"
65 | champs == "DI" = "doi"
66 | champs == "UR" = "url"
67 | champs == "N2" = "abstract"
68 | otherwise = champs
69
70
71 {-
72 fixTitle :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
73 fixTitle ns = ns <> [ti, ab]
74 where
75 ti = case
76 -}