2 Module : Gargantext.Text.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDate1 returns date as Text:
13 import Gargantext.Parsers.Date as DGP
14 DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
21 module Gargantext.Text.Parsers.Date (parseDate1, parseDate, fromRFC3339, parseTimeOfDay, getMultiplicator) where
23 import Gargantext.Core (Lang(FR,EN))
24 import Gargantext.Prelude
25 import Prelude (toInteger, div, otherwise, (++))
26 --import Gargantext.Types.Main as G
28 import Data.Time.Clock (UTCTime, getCurrentTime)
29 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
30 import Data.Time.LocalTime (utc)
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
32 , DucklingTime(DucklingTime)
34 import Duckling.Core (makeLocale, Some(This), Dimension(Time))
35 import qualified Duckling.Core as DC
36 import Duckling.Types (jsonValue, Entity)
38 import Duckling.Api (analyze, parse)
39 import qualified Data.HashSet as HashSet
40 import qualified Data.Aeson as Json
41 import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
42 import Data.Time.Calendar (Day, fromGregorian)
43 import Data.Fixed (Fixed (MkFixed))
44 import Data.Foldable (length)
45 import Data.HashMap.Strict as HM hiding (map)
47 import Control.Monad ((=<<))
48 import Data.Either (Either)
49 import Data.String (String)
50 import Data.Text (Text, unpack)
51 -- import Duckling.Engine (parseAndResolve)
52 -- import Duckling.Rules (rulesFor)
53 -- import Duckling.Debug as DB
55 import Duckling.Types (ResolvedToken)
58 import Text.Parsec.Error (ParseError)
59 import Text.Parsec.String (Parser)
60 import Text.Parsec.Prim (Stream, ParsecT)
61 import qualified Text.ParserCombinators.Parsec (parse)
62 import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
64 import Text.XML.HXT.DOM.Util (decimalStringToInt)
65 -- TODO add Paris at Duckling.Locale Region datatype
66 -- | To get Homogeinity of the languages
67 -- TODO : put this in a more generic place in the source code
68 parserLang :: Lang -> DC.Lang
71 -- parserLang _ = panic "not implemented"
73 -- | Final Date parser API
74 -- IO can be avoided here:
75 -- currentContext :: Lang -> IO Context
76 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
77 -- parseDate1 :: Context -> Text -> SomeErrorHandling Text
79 -- TODO error handling
80 parseDate1 :: Lang -> Text -> IO Text
81 parseDate1 lang text = do
82 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
83 case headMay maybeJson of
84 Just (Json.Object object) -> case HM.lookup "value" object of
85 Just (Json.String date) -> pure date
86 Just _ -> panic "ParseDate ERROR: should be a json String"
87 Nothing -> panic "ParseDate ERROR: no date found"
88 _ -> panic "ParseDate ERROR: type error"
92 -- | Current Time in DucklingTime format
93 -- TODO : get local Time in a more generic way
94 utcToDucklingTime :: UTCTime -> DucklingTime
95 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
97 -- | Local Context which depends on Lang and Time
98 localContext :: Lang -> DucklingTime -> Context
99 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
101 -- | Date parser with Duckling
102 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
103 parseDateWithDuckling lang input = do
104 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
105 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
106 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
109 parseDate :: Lang -> Text -> IO [Entity]
110 parseDate lang input = do
111 context <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
112 pure $ parse input context [(This Time)]
114 -- | Permit to transform a String to an Int in a monadic context
115 wrapDST :: Monad m => String -> m Int
116 wrapDST = (return . decimalStringToInt)
118 -- | Generic parser which take at least one element not given in argument
119 many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
120 many1NoneOf = (many1 . noneOf)
122 getMultiplicator :: Int -> Int
125 | otherwise = 10 * (getMultiplicator $ div a 10)
127 -- | Parser for date format y-m-d
128 parseGregorian :: Parser Day
130 y <- wrapDST =<< many1NoneOf ['-']
132 m <- wrapDST =<< many1NoneOf ['-']
134 d <- wrapDST =<< many1NoneOf ['T']
136 return $ fromGregorian (toInteger y) m d
138 -- | Parser for time format h:m:s
139 parseTimeOfDay :: Parser TimeOfDay
141 h <- wrapDST =<< many1NoneOf [':']
143 m <- wrapDST =<< many1NoneOf [':']
145 r <- many1NoneOf ['.']
147 dec <- many1NoneOf ['+', '-']
148 let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
149 seconds = nb * 10^(12-l)
150 return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
153 -- | Parser for timezone format +hh:mm
154 parseTimeZone :: Parser TimeZone
156 sign <- oneOf ['+', '-']
157 h <- wrapDST =<< many1NoneOf [':']
159 m <- wrapDST =<< (many1 $ anyChar)
160 let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
161 in return $ TimeZone timeInMinute False "CET"
163 -- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
164 parseZonedTime :: Parser ZonedTime
167 tod <- parseTimeOfDay
169 return $ ZonedTime (LocalTime d (tod)) tz
171 -- | Opposite of toRFC3339
172 fromRFC3339 :: Text -> Either ParseError ZonedTime
173 fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
174 where input = unpack t