2 Module : Gargantext.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDate1 returns date as Text:
13 import Gargantext.Parsers.Date as DGP
14 DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE OverloadedStrings #-}
18 {-# LANGUAGE FlexibleContexts #-}
19 {-# LANGUAGE NoImplicitPrelude #-}
21 module Gargantext.Parsers.Date (parseDate1, Lang(FR, EN), parseDate, fromRFC3339) where
23 import Gargantext.Prelude
24 import Prelude (toInteger)
25 --import Gargantext.Types.Main as G
27 import Data.Time.Clock (UTCTime, getCurrentTime)
28 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
29 import Data.Time.LocalTime (utc)
30 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
31 , DucklingTime(DucklingTime)
33 import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time))
34 import Duckling.Types (jsonValue, Entity)
36 import Duckling.Api (analyze, parse)
37 import qualified Data.HashSet as HashSet
38 import qualified Data.Aeson as Json
39 import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..), getCurrentTimeZone)
40 import Data.Time.Calendar (Day, fromGregorian)
41 import Data.Fixed (Fixed (MkFixed))
42 import Data.HashMap.Strict as HM hiding (map)
44 import Control.Monad ((=<<))
45 import Data.Either (Either)
46 import Data.String (String)
47 import Data.Text (Text, unpack)
48 -- import Duckling.Engine (parseAndResolve)
49 -- import Duckling.Rules (rulesFor)
50 -- import Duckling.Debug as DB
52 import Duckling.Types (ResolvedToken)
54 import System.IO.Unsafe (unsafePerformIO)
56 import Text.Parsec.Error (ParseError)
57 import Text.Parsec.String (Parser)
58 import Text.Parsec.Prim (Stream, ParsecT)
59 import qualified Text.ParserCombinators.Parsec (parse)
60 import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
62 import Text.XML.HXT.DOM.Util (decimalStringToInt)
63 -- TODO add Paris at Duckling.Locale Region datatype
64 -- | To get Homogeinity of the languages
65 -- TODO : put this in a more generic place in the source code
66 --parserLang :: G.Language -> Lang
67 --parserLang G.FR = FR
68 --parserLang G.EN = EN
71 -- | Final Date parser API
72 -- IO can be avoided here:
73 -- currentContext :: Lang -> IO Context
74 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
75 -- parseDate1 :: Context -> Text -> SomeErrorHandling Text
76 parseDate1 :: Lang -> Text -> IO Text
77 parseDate1 lang text = do
78 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
79 case headMay maybeJson of
80 Just (Json.Object object) -> case HM.lookup "value" object of
81 Just (Json.String date) -> pure date
82 Just _ -> panic "ParseDate ERROR: should be a json String"
83 Nothing -> panic "ParseDate ERROR: no date found"
84 _ -> panic "ParseDate ERROR: type error"
88 -- | Current Time in DucklingTime format
89 -- TODO : get local Time in a more generic way
90 utcToDucklingTime :: UTCTime -> DucklingTime
91 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
93 -- | Local Context which depends on Lang and Time
94 localContext :: Lang -> DucklingTime -> Context
95 localContext lang dt = Context {referenceTime = dt, locale = makeLocale lang Nothing}
97 -- | Date parser with Duckling
98 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
99 parseDateWithDuckling lang input = do
100 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
101 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
102 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
105 parseDate :: Lang -> Text -> IO [Entity]
106 parseDate lang input = do
107 context <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
108 pure $ parse input context [(This Time)]
110 -- | Permit to transform a String to an Int in a monadic context
111 wrapDST :: Monad m => String -> m Int
112 wrapDST = (return . decimalStringToInt)
114 -- | Generic parser which take at least one element not given in argument
115 many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
116 many1NoneOf = (many1 . noneOf)
118 -- | Parser for date format y-m-d
119 parseGregorian :: Parser Day
121 y <- wrapDST =<< many1NoneOf ['-']
123 m <- wrapDST =<< many1NoneOf ['-']
125 d <- wrapDST =<< many1NoneOf ['T']
127 return $ fromGregorian (toInteger y) m d
129 -- | Parser for time format h:m:s
130 parseTimeOfDay :: Parser TimeOfDay
132 h <- wrapDST =<< many1NoneOf [':']
134 m <- wrapDST =<< many1NoneOf [':']
136 s <- wrapDST =<< many1NoneOf ['+', '-']
137 return $ TimeOfDay h m (MkFixed $ toInteger s)
139 -- | Parser for timezone format +hh:mm
140 parseTimeZone :: Parser TimeZone
142 sign <- oneOf ['+', '-']
143 h <- wrapDST =<< many1NoneOf [':']
145 m <- wrapDST =<< (many1 $ anyChar)
146 let (TimeZone _ s n) = unsafePerformIO getCurrentTimeZone
147 let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
148 in return $ TimeZone timeInMinute s n
150 -- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
151 parseZonedTime :: Parser ZonedTime
154 tod <- parseTimeOfDay
156 return $ ZonedTime (LocalTime d (tod)) tz
158 -- | Opposite of toRFC3339
159 fromRFC3339 :: Text -> Either ParseError ZonedTime
160 fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
161 where input = unpack t