2 Module : Gargantext.Text.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDateRaw returns date as Text:
13 import Gargantext.Text.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
21 module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw, parseGregorian, wrapDST) where
23 import Data.HashMap.Strict as HM hiding (map)
24 import Data.Text (Text, unpack, splitOn, pack)
25 import Data.Time (parseTimeOrError, defaultTimeLocale)
26 import Data.Time.Clock (UTCTime, getCurrentTime)
27 import Data.Time.LocalTime (utc)
28 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
29 import Duckling.Api (analyze)
30 import Duckling.Core (makeLocale, Some(This), Dimension(Time))
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime))
32 import Duckling.Types (ResolvedToken)
33 import Duckling.Types (jsonValue)
34 import Gargantext.Core (Lang(FR,EN))
35 import Gargantext.Prelude
36 import qualified Data.Aeson as Json
37 import qualified Data.HashSet as HashSet
38 import qualified Duckling.Core as DC
40 -- | Unused import (to parse Date Format, keeping it for maybe next steps)
41 import Control.Monad ((=<<))
42 import Data.Either (Either)
43 import Data.Fixed (Fixed (MkFixed))
44 import Data.Foldable (length)
45 import Data.String (String)
46 import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
47 import Data.Time.Calendar (Day, fromGregorian)
48 import Duckling.Debug as DB
49 import Duckling.Engine (parseAndResolve)
50 import Duckling.Rules (rulesFor)
51 import Prelude (toInteger, div, otherwise, (++))
52 import Text.Parsec.Error (ParseError)
53 import Text.Parsec.Prim (Stream, ParsecT)
54 import Text.Parsec.String (Parser)
55 import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
56 import Text.XML.HXT.DOM.Util (decimalStringToInt)
57 import qualified Text.ParserCombinators.Parsec (parse)
59 ------------------------------------------------------------------------
61 -- Parses dates mentions in full text given the language.
62 -- >>> parseDate FR (pack "10 avril 1979 à 19H")
63 -- 1979-04-10 19:00:00 UTC
64 -- >>> parseDate EN (pack "April 10 1979")
65 -- 1979-04-10 00:00:00 UTC
66 parseDate :: Lang -> Text -> IO UTCTime
67 parseDate lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
69 type DateFormat = Text
72 parseDate' :: DateFormat -> DateNull -> Lang -> Text -> IO UTCTime
73 parseDate' format def lang s = do
74 dateStr' <- parseDateRaw lang s
75 let dateStr = unpack $ maybe def identity
76 $ head $ splitOn "." dateStr'
77 pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr
80 -- TODO add Paris at Duckling.Locale Region datatype
81 -- | To get Homogeinity of the languages
82 -- TODO : put this in a more generic place in the source code
83 parserLang :: Lang -> DC.Lang
86 -- parserLang _ = panic "not implemented"
88 -- | Final Date parser API
89 -- IO can be avoided here:
90 -- currentContext :: Lang -> IO Context
91 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
92 -- parseDateRaw :: Context -> Text -> SomeErrorHandling Text
94 -- TODO error handling
95 parseDateRaw :: Lang -> Text -> IO (Text)
96 parseDateRaw lang text = do
97 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
98 case headMay maybeJson of
99 Just (Json.Object object) -> case HM.lookup "value" object of
100 Just (Json.String date) -> pure date
101 Just _ -> panic "ParseDateRaw ERROR: should be a json String"
102 Nothing -> panic $ "ParseDateRaw ERROR: no date found" <> (pack . show) lang <> " " <> text
104 _ -> panic $ "ParseDateRaw ERROR: type error" <> (pack . show) lang <> " " <> text
107 -- | Current Time in DucklingTime format
108 -- TODO : get local Time in a more generic way
109 utcToDucklingTime :: UTCTime -> DucklingTime
110 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
112 -- | Local Context which depends on Lang and Time
113 localContext :: Lang -> DucklingTime -> Context
114 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
116 -- | Date parser with Duckling
117 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
118 parseDateWithDuckling lang input = do
119 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
120 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
121 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
123 -- | Permit to transform a String to an Int in a monadic context
124 wrapDST :: Monad m => String -> m Int
125 wrapDST = return . decimalStringToInt
127 -- | Generic parser which take at least one element not given in argument
128 many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
129 many1NoneOf = (many1 . noneOf)
131 --getMultiplicator :: Int -> Int
134 | otherwise = 10 * (getMultiplicator $ div a 10)
136 -- | Parser for date format y-m-d
137 parseGregorian :: Parser Day
139 y <- wrapDST =<< many1NoneOf ['-']
141 m <- wrapDST =<< many1NoneOf ['-']
143 d <- wrapDST =<< many1NoneOf ['T']
145 return $ fromGregorian (toInteger y) m d
147 ---- | Parser for time format h:m:s
148 parseTimeOfDay :: Parser TimeOfDay
150 h <- wrapDST =<< many1NoneOf [':']
152 m <- wrapDST =<< many1NoneOf [':']
154 r <- many1NoneOf ['.']
156 dec <- many1NoneOf ['+', '-']
157 let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
158 seconds = nb * 10^(12-l)
159 return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
162 -- | Parser for timezone format +hh:mm
163 parseTimeZone :: Parser TimeZone
165 sign <- oneOf ['+', '-']
166 h <- wrapDST =<< many1NoneOf [':']
168 m <- wrapDST =<< (many1 $ anyChar)
169 let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
170 in return $ TimeZone timeInMinute False "CET"
172 ---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
173 parseZonedTime :: Parser ZonedTime
176 tod <- parseTimeOfDay
178 return $ ZonedTime (LocalTime d (tod)) tz
180 ---- | Opposite of toRFC3339
181 fromRFC3339 :: Text -> Either ParseError ZonedTime
182 fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
183 where input = unpack t