2 Module : Gargantext.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDate1 returns date as Text:
13 import Gargantext.Parsers.Date as DGP
14 DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE OverloadedStrings #-}
18 {-# LANGUAGE FlexibleContexts #-}
20 module Gargantext.Parsers.Date (parseDate1, Lang(FR, EN), parseDate) where
22 import Gargantext.Prelude
23 --import Gargantext.Types.Main as G
25 import Data.Time.Clock (UTCTime, getCurrentTime)
26 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
27 import Data.Time.LocalTime (utc)
28 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
29 , DucklingTime(DucklingTime)
31 import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time))
32 import Duckling.Types (jsonValue, Entity)
34 import Duckling.Api (analyze, parse)
35 import qualified Data.HashSet as HashSet
36 import qualified Data.Aeson as Json
37 import Data.HashMap.Strict as HM
38 import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..), getCurrentTimeZone)
39 import Data.Time.Calendar (Day, fromGregorian)
40 import Data.Fixed (Fixed (MkFixed))
42 import Data.Text (Text, unpack)
43 -- import Duckling.Engine (parseAndResolve)
44 -- import Duckling.Rules (rulesFor)
45 -- import Duckling.Debug as DB
47 import Duckling.Types (ResolvedToken)
49 import System.IO.Unsafe (unsafePerformIO)
51 import Text.Parsec.Error (ParseError)
52 import Text.Parsec.String (Parser)
53 import Text.Parsec.Prim (Stream, ParsecT)
54 import qualified Text.ParserCombinators.Parsec (parse)
55 import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
57 import Text.XML.HXT.DOM.Util (decimalStringToInt)
58 -- TODO add Paris at Duckling.Locale Region datatype
59 -- | To get Homogeinity of the languages
60 -- TODO : put this in a more generic place in the source code
61 --parserLang :: G.Language -> Lang
62 --parserLang G.FR = FR
63 --parserLang G.EN = EN
66 -- | Final Date parser API
67 -- IO can be avoided here:
68 -- currentContext :: Lang -> IO Context
69 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
70 -- parseDate1 :: Context -> Text -> SomeErrorHandling Text
71 parseDate1 :: Lang -> Text -> IO Text
72 parseDate1 lang text = do
73 maybeJson <- pm jsonValue <$> parseDateWithDuckling lang text
74 case headMay maybeJson of
75 Just (Json.Object object) -> case HM.lookup "value" object of
76 Just (Json.String date) -> pure date
77 Just _ -> error "ParseDate ERROR: should be a json String"
78 Nothing -> error "ParseDate ERROR: no date found"
79 _ -> error "ParseDate ERROR: type error"
83 -- | Current Time in DucklingTime format
84 -- TODO : get local Time in a more generic way
85 utcToDucklingTime :: UTCTime -> DucklingTime
86 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
88 -- | Local Context which depends on Lang and Time
89 localContext :: Lang -> DucklingTime -> Context
90 localContext lang dt = Context {referenceTime = dt, locale = makeLocale lang Nothing}
92 -- | Date parser with Duckling
93 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
94 parseDateWithDuckling lang input = do
95 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
96 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
97 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
100 parseDate :: Lang -> Text -> IO [Entity]
101 parseDate lang input = do
102 context <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
103 pure $ parse input context [(This Time)]
105 wrapDST :: Monad m => String -> m Int
106 wrapDST = (return . decimalStringToInt)
108 many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
109 many1NoneOf = (many1 . noneOf)
111 parseGregorian :: Parser Day
113 y <- wrapDST =<< many1NoneOf ['-']
115 m <- wrapDST =<< many1NoneOf ['-']
117 d <- wrapDST =<< many1NoneOf ['T']
119 return $ fromGregorian (toInteger y) m d
121 parseTimeOfDay :: Parser TimeOfDay
123 h <- wrapDST =<< many1NoneOf [':']
125 m <- wrapDST =<< many1NoneOf [':']
127 s <- wrapDST =<< many1NoneOf ['+', '-']
128 return $ TimeOfDay h m (MkFixed $ toInteger s)
130 parseTimeZone :: Parser TimeZone
132 sign <- oneOf ['+', '-']
133 h <- wrapDST =<< many1NoneOf [':']
135 m <- wrapDST =<< (many1 $ anyChar)
136 let (TimeZone _ s n) = unsafePerformIO getCurrentTimeZone
137 let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
138 in return $ TimeZone timeInMinute s n
140 parseZonedTime :: Parser ZonedTime
143 tod <- parseTimeOfDay
145 return $ ZonedTime (LocalTime d (tod)) tz
147 fromRFC3339 :: Text -> Either ParseError ZonedTime
148 fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
149 where input = unpack t