2 Module : Gargantext.Text.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDateRaw returns date as Text:
13 import Gargantext.Text.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
21 module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw) where
23 import Data.HashMap.Strict as HM hiding (map)
24 import Data.Text (Text, unpack, splitOn, pack)
25 import Data.Time (parseTimeOrError, defaultTimeLocale)
26 import Data.Time.Clock (UTCTime, getCurrentTime)
27 import Data.Time.LocalTime (utc)
28 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
29 import Duckling.Api (analyze)
30 import Duckling.Core (makeLocale, Some(This), Dimension(Time))
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime))
32 import Duckling.Types (ResolvedToken)
33 import Duckling.Types (jsonValue)
34 import Gargantext.Core (Lang(FR,EN))
35 import Gargantext.Prelude
36 import qualified Data.Aeson as Json
37 import qualified Data.HashSet as HashSet
38 import qualified Duckling.Core as DC
40 -- | Unused import (to parse Date Format, keeping it for maybe next steps)
41 -- import Control.Monad ((=<<))
42 -- import Data.Either (Either)
43 -- import Data.Fixed (Fixed (MkFixed))
44 -- import Data.Foldable (length)
45 -- import Data.String (String)
46 -- import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
47 -- import Data.Time.Calendar (Day, fromGregorian)
48 -- import Duckling.Debug as DB
49 -- import Duckling.Engine (parseAndResolve)
50 -- import Duckling.Rules (rulesFor)
51 -- import Prelude (toInteger, div, otherwise, (++))
52 -- import Text.Parsec.Error (ParseError)
53 -- import Text.Parsec.Prim (Stream, ParsecT)
54 -- import Text.Parsec.String (Parser)
55 -- import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
56 -- import Text.XML.HXT.DOM.Util (decimalStringToInt)
57 -- import qualified Text.ParserCombinators.Parsec (parse)
59 ------------------------------------------------------------------------
61 -- Parses dates mentions in full text given the language.
62 -- >>> parseDate FR (pack "10 avril 1979 à 19H")
63 -- 1979-04-10 19:00:00 UTC
64 -- >>> parseDate EN (pack "April 10 1979")
65 -- 1979-04-10 00:00:00 UTC
66 parseDate :: Lang -> Text -> IO UTCTime
68 dateStr' <- parseDateRaw lang s
69 let format = "%Y-%m-%dT%T"
70 let dateStr = unpack $ maybe "0-0-0T0:0:0" identity
71 $ head $ splitOn "." dateStr'
72 pure $ parseTimeOrError True defaultTimeLocale format dateStr
76 -- TODO add Paris at Duckling.Locale Region datatype
77 -- | To get Homogeinity of the languages
78 -- TODO : put this in a more generic place in the source code
79 parserLang :: Lang -> DC.Lang
82 -- parserLang _ = panic "not implemented"
84 -- | Final Date parser API
85 -- IO can be avoided here:
86 -- currentContext :: Lang -> IO Context
87 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
88 -- parseDateRaw :: Context -> Text -> SomeErrorHandling Text
90 -- TODO error handling
91 parseDateRaw :: Lang -> Text -> IO (Text)
92 parseDateRaw lang text = do
93 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
94 case headMay maybeJson of
95 Just (Json.Object object) -> case HM.lookup "value" object of
96 Just (Json.String date) -> pure date
97 Just _ -> panic "ParseDateRaw ERROR: should be a json String"
98 Nothing -> panic $ "ParseDateRaw ERROR: no date found" <> (pack . show) lang <> " " <> text
100 _ -> panic $ "ParseDateRaw ERROR: type error" <> (pack . show) lang <> " " <> text
103 -- | Current Time in DucklingTime format
104 -- TODO : get local Time in a more generic way
105 utcToDucklingTime :: UTCTime -> DucklingTime
106 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
108 -- | Local Context which depends on Lang and Time
109 localContext :: Lang -> DucklingTime -> Context
110 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
112 -- | Date parser with Duckling
113 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
114 parseDateWithDuckling lang input = do
115 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
116 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
117 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
119 -- | Permit to transform a String to an Int in a monadic context
120 --wrapDST :: Monad m => String -> m Int
121 --wrapDST = (return . decimalStringToInt)
123 -- | Generic parser which take at least one element not given in argument
124 --many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
125 --many1NoneOf = (many1 . noneOf)
127 --getMultiplicator :: Int -> Int
130 -- | otherwise = 10 * (getMultiplicator $ div a 10)
132 -- | Parser for date format y-m-d
133 --parseGregorian :: Parser Day
134 --parseGregorian = do
135 -- y <- wrapDST =<< many1NoneOf ['-']
137 -- m <- wrapDST =<< many1NoneOf ['-']
139 -- d <- wrapDST =<< many1NoneOf ['T']
141 -- return $ fromGregorian (toInteger y) m d
143 ---- | Parser for time format h:m:s
144 --parseTimeOfDay :: Parser TimeOfDay
145 --parseTimeOfDay = do
146 -- h <- wrapDST =<< many1NoneOf [':']
148 -- m <- wrapDST =<< many1NoneOf [':']
150 -- r <- many1NoneOf ['.']
152 -- dec <- many1NoneOf ['+', '-']
153 -- let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
154 -- seconds = nb * 10^(12-l)
155 -- return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
158 -- | Parser for timezone format +hh:mm
159 --parseTimeZone :: Parser TimeZone
161 -- sign <- oneOf ['+', '-']
162 -- h <- wrapDST =<< many1NoneOf [':']
164 -- m <- wrapDST =<< (many1 $ anyChar)
165 -- let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
166 -- in return $ TimeZone timeInMinute False "CET"
168 ---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
169 --parseZonedTime :: Parser ZonedTime
171 -- d <- parseGregorian
172 -- tod <- parseTimeOfDay
173 -- tz <- parseTimeZone
174 -- return $ ZonedTime (LocalTime d (tod)) tz
176 ---- | Opposite of toRFC3339
177 --fromRFC3339 :: Text -> Either ParseError ZonedTime
178 --fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
179 -- where input = unpack t