]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Corpus/Parsers/RIS/Presse.hs
[REFACT] uniforming hyperdata fields name
[gargantext.git] / src / Gargantext / Text / Corpus / Parsers / RIS / Presse.hs
1 {-|
2 Module : Gargantext.Text.Corpus.Parsers.RIS.Presse
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Presse RIS format parser for Europresse Database.
11
12 -}
13
14
15 module Gargantext.Text.Corpus.Parsers.RIS.Presse (presseEnrich) where
16
17 import Data.List (lookup)
18 import Data.Either (either)
19 import Data.Tuple.Extra (first, both, uncurry)
20 import Data.Attoparsec.ByteString (parseOnly)
21 import Data.ByteString (ByteString, length)
22 import Gargantext.Prelude hiding (takeWhile, take, length)
23 import Gargantext.Text.Corpus.Parsers.RIS (onField)
24 import Gargantext.Core (Lang(..))
25 import qualified Gargantext.Text.Corpus.Parsers.Date.Attoparsec as Date
26
27
28
29 presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
30 presseEnrich = (onField "DA" parseDate)
31 . (onField "LA" parseLang)
32 . fixFields
33
34
35 parseDate :: ByteString -> [(ByteString, ByteString)]
36 parseDate str = either (const []) identity $ parseOnly (Date.parserWith "/") str
37
38 parseLang :: ByteString -> [(ByteString, ByteString)]
39 parseLang "Français" = [(langField, cs $ show FR)]
40 parseLang "English" = [(langField, cs $ show EN)]
41 parseLang x = [(langField, x)]
42
43 langField :: ByteString
44 langField = "language"
45
46
47 fixFields :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
48 fixFields ns = map (first fixFields'') ns
49 where
50 -- | Title is sometimes longer than abstract
51 fixFields'' = case uncurry (>) <$> look'' of
52 Just True -> fixFields' "abstract" "title"
53 _ -> fixFields' "title" "abstract"
54
55 look'' :: Maybe (Int, Int)
56 look'' = both length <$> look
57
58 look :: Maybe (ByteString,ByteString)
59 look = (,) <$> lookup "TI" ns <*> lookup "N2" ns
60
61
62 fixFields' :: ByteString -> ByteString
63 -> ByteString -> ByteString
64 fixFields' title abstract champs
65 | champs == "AU" = "authors"
66 | champs == "TI" = title
67 | champs == "JF" = "source"
68 | champs == "DI" = "doi"
69 | champs == "UR" = "url"
70 | champs == "N2" = abstract
71 | otherwise = champs
72
73