2 Module : Gargantext.Core.Text.Corpus.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDateRaw returns date as Text:
13 import Gargantext.Core.Text.Corpus.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE TypeFamilies #-}
19 module Gargantext.Core.Text.Corpus.Parsers.Date
20 {-(parse, parseRaw, dateSplit, Year, Month, Day)-}
23 import System.Environment (getEnv)
24 import Data.Aeson (toJSON, Value)
25 import Data.Either (Either(..))
26 import Data.HashMap.Strict as HM hiding (map)
27 import Data.Text (Text, unpack, splitOn, replace)
28 import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian)
29 import qualified Data.Time.Calendar as DTC
30 import Data.Time.Clock (UTCTime(..), getCurrentTime)
31 import Data.Time.Clock ( secondsToDiffTime)
32 import Data.Time.LocalTime (utc)
33 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
34 import Duckling.Api (analyze)
35 import Duckling.Core (makeLocale, Dimension(Time))
36 import Duckling.Types (Seal(..))
37 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
38 import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
39 import Gargantext.Core (Lang(FR,EN))
40 import Gargantext.Core.Types (DebugMode(..), withDebugMode)
41 import Gargantext.Prelude
42 --import qualified Control.Exception as CE
43 import qualified Data.Aeson as Json
44 import qualified Data.HashSet as HashSet
45 import qualified Duckling.Core as DC
47 ------------------------------------------------------------------------
48 -- | Parse date to Ints
49 -- TODO add hours, minutes and seconds
50 dateSplit :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
51 dateSplit _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
52 dateSplit l (Just txt) = do
53 utcTime <- parse l txt
54 let (y, m, d) = split' utcTime
55 pure (Just utcTime, (Just y, Just m,Just d))
57 split' :: UTCTime -> (Year, Month, Day)
58 split' utcTime = (fromIntegral y, m, d)
60 (UTCTime day _) = utcTime
61 (y,m,d) = toGregorian day
66 ------------------------------------------------------------------------
69 -- Parses dates mentions in full text given the language.
70 -- >>> parse FR (pack "10 avril 1900 à 19H")
71 -- 1900-04-10 19:00:00 UTC
72 -- >>> parse EN (pack "April 10 1900")
73 -- 1900-04-10 00:00:00 UTC
74 parse :: Lang -> Text -> IO UTCTime
76 dateStr' <- parseRawSafe lang s
77 case dateFlow dateStr' of
78 DateFlowSuccess ok -> pure ok
79 _ -> withDebugMode (DebugMode True)
80 "[G.C.T.P.T.Date parse]" (lang,s)
85 defaultDate = "0-0-0T0:0:0"
87 type DateFormat = Text
88 type DateDefault = Text
91 data DateFlow = DucklingSuccess { ds_result :: Text }
92 | DucklingFailure { df_result :: Text }
93 | ReadFailure1 { rf1_result :: Text }
94 | ReadFailure2 { rf2_result :: Text }
95 | DateFlowSuccess { success :: UTCTime }
100 dateFlow :: DateFlow -> DateFlow
101 dateFlow (DucklingSuccess res) = case (head $ splitOn "." res) of
102 Nothing -> dateFlow (ReadFailure1 res)
103 Just re -> case readDate res of
104 Nothing -> dateFlow (ReadFailure1 re)
105 Just ok -> DateFlowSuccess ok
106 dateFlow (DucklingFailure txt) = case readDate $ replace " " "T" txt of
107 Nothing -> dateFlow (ReadFailure1 txt)
108 Just ok -> DateFlowSuccess ok
109 dateFlow (ReadFailure1 txt) = case readDate txt of
110 Nothing -> dateFlow $ ReadFailure2 txt
111 Just ok -> DateFlowSuccess ok
112 dateFlow (ReadFailure2 txt) = case readDate $ replace " " "" txt <> "-01-01T00:00:00" of
113 Nothing -> DateFlowFailure
114 Just ok -> DateFlowSuccess ok
115 dateFlow _ = DateFlowFailure
118 readDate :: Text -> Maybe UTCTime
120 let format = cs $ iso8601DateFormat (Just "%H:%M:%S")
121 parseTimeM True defaultTimeLocale (unpack format) (cs txt)
124 -- TODO add Paris at Duckling.Locale Region datatype
125 -- | To get Homogeinity of the languages
126 -- TODO : put this in a more generic place in the source code
127 parserLang :: Lang -> DC.Lang
128 parserLang FR = DC.FR
129 parserLang EN = DC.EN
130 parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (cs $ show lang)
132 -- | Final Date parser API
133 -- IO can be avoided here:
134 -- currentContext :: Lang -> IO Context
135 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
136 -- parseRaw :: Context -> Text -> SomeErrorHandling Text
139 parseRawSafe :: Lang -> Text -> IO DateFlow
140 parseRawSafe lang text = do
141 let triedParseRaw = parseRaw lang text
142 dateStr' <- case triedParseRaw of
143 --Left (CE.SomeException err) -> do
145 envLang <- getEnv "LANG"
146 printDebug "[G.C.T.C.P.Date] Exception: " (err, envLang, lang, text)
147 pure $ DucklingFailure text
148 Right res -> pure $ DucklingSuccess res
151 --tryParseRaw :: CE.Exception e => Lang -> Text -> IO (Either e Text)
152 --tryParseRaw lang text = CE.try (parseRaw lang text)
154 parseRaw :: Lang -> Text -> Either Text Text
155 parseRaw lang text = do -- case result
156 let maybeResult = extractValue $ getTimeValue
157 $ parseDateWithDuckling lang text (Options True)
159 Just result -> Right result
161 -- printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) text
162 Left $ "[G.C.T.C.P.D.parseRaw ERROR] " <> (cs . show) lang <> " :: " <> text
164 getTimeValue :: [ResolvedToken] -> Maybe Value
165 getTimeValue rt = case head rt of
168 Just x -> case rval x of
169 RVal Time t -> Just $ toJSON t
173 extractValue :: Maybe Value -> Maybe Text
174 extractValue (Just (Json.Object object)) =
175 case HM.lookup "value" object of
176 Just (Json.String date) -> Just date
178 extractValue _ = Nothing
180 -- | Current Time in DucklingTime format
181 -- TODO : get local Time in a more generic way
182 utcToDucklingTime :: UTCTime -> DucklingTime
183 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
185 -- | Local Context which depends on Lang and Time
186 localContext :: Lang -> DucklingTime -> Context
187 localContext lang dt = Context { referenceTime = dt
188 , locale = makeLocale (parserLang lang) Nothing }
190 defaultDay :: DTC.Day
191 defaultDay = DTC.fromGregorian 1 1 1
193 defaultUTCTime :: UTCTime
194 defaultUTCTime = UTCTime { utctDay = defaultDay
195 , utctDayTime = secondsToDiffTime 0 }
197 -- | Date parser with Duckling
198 parseDateWithDuckling :: Lang -> Text -> Options -> [ResolvedToken]
199 parseDateWithDuckling lang input options = do
200 let contxt = localContext lang $ utcToDucklingTime defaultUTCTime
201 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
202 -- TODO check/test Options False or True
203 analyze input contxt options $ HashSet.fromList [(Seal Time)]