]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Corpus/Parsers/Date/Parsec.hs
Merge branch 'dev' into dev-doc-table-optimization
[gargantext.git] / src / Gargantext / Text / Corpus / Parsers / Date / Parsec.hs
1 {-|
2 Module : Gargantext.Text.Corpus.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Text.Corpus.Parsers.Date.Parsec
14 where
15
16 import Control.Monad ((=<<))
17 import Data.Either (Either)
18 import Data.Fixed (Fixed (MkFixed))
19 import Data.Foldable (length)
20 import Data.String (String)
21 import Data.Text (Text, unpack)
22 import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
23 import Data.Time.Calendar (Day, fromGregorian)
24 import Gargantext.Prelude
25 import Prelude (toInteger, div, otherwise, (++))
26 import Text.Parsec.Error (ParseError)
27 import Text.Parsec.Prim (Stream, ParsecT)
28 import Text.Parsec.String (Parser)
29 import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
30 import Text.XML.HXT.DOM.Util (decimalStringToInt)
31 import qualified Text.ParserCombinators.Parsec (parse)
32
33 -- | Permit to transform a String to an Int in a monadic context
34 wrapDST :: Monad m => String -> m Int
35 wrapDST = return . decimalStringToInt
36
37 -- | Generic parser which take at least one element not given in argument
38 many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
39 many1NoneOf = (many1 . noneOf)
40
41 getMultiplicator :: Int -> Int
42 getMultiplicator a
43 | 0 >= a = 1
44 | otherwise = 10 * (getMultiplicator $ div a 10)
45
46 -- | Parser for date format y-m-d
47 parseGregorian :: Parser Day
48 parseGregorian = do
49 y <- wrapDST =<< many1NoneOf ['-']
50 _ <- char '-'
51 m <- wrapDST =<< many1NoneOf ['-']
52 _ <- char '-'
53 d <- wrapDST =<< many1NoneOf ['T']
54 _ <- char 'T'
55 return $ fromGregorian (toInteger y) m d
56
57 ---- | Parser for time format h:m:s
58 parseTimeOfDay :: Parser TimeOfDay
59 parseTimeOfDay = do
60 h <- wrapDST =<< many1NoneOf [':']
61 _ <- char ':'
62 m <- wrapDST =<< many1NoneOf [':']
63 _ <- char ':'
64 r <- many1NoneOf ['.']
65 _ <- char '.'
66 dec <- many1NoneOf ['+', '-']
67 let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
68 seconds = nb * 10^(12-l)
69 return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
70
71
72 -- | Parser for timezone format +hh:mm
73 parseTimeZone :: Parser TimeZone
74 parseTimeZone = do
75 sign <- oneOf ['+', '-']
76 h <- wrapDST =<< many1NoneOf [':']
77 _ <- char ':'
78 m <- wrapDST =<< (many1 $ anyChar)
79 let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
80 in return $ TimeZone timeInMinute False "CET"
81
82 ---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
83 parseZonedTime :: Parser ZonedTime
84 parseZonedTime= do
85 d <- parseGregorian
86 tod <- parseTimeOfDay
87 tz <- parseTimeZone
88 return $ ZonedTime (LocalTime d (tod)) tz
89
90 ---- | Opposite of toRFC3339
91 fromRFC3339 :: Text -> Either ParseError ZonedTime
92 fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
93 where input = unpack t