2 Module : Gargantext.Core.Text.Corpus.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDateRaw returns date as Text:
13 import Gargantext.Core.Text.Corpus.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE TypeFamilies #-}
19 module Gargantext.Core.Text.Corpus.Parsers.Date
20 {-(parse, parseRaw, dateSplit, Year, Month, Day)-}
23 --import qualified Control.Exception as CE
24 import Data.Aeson (toJSON, Value)
25 import Data.Either (Either(..))
26 import Data.HashMap.Strict as HM hiding (map)
27 import Data.Maybe (fromMaybe)
28 import Data.Text (Text, unpack, splitOn, replace)
29 import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian)
30 import Data.Time.Clock ( secondsToDiffTime)
31 import Data.Time.Clock (UTCTime(..), getCurrentTime)
32 import Data.Time.LocalTime (utc)
33 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
34 import Duckling.Api (analyze)
35 import Duckling.Core (makeLocale, Dimension(Time))
36 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
37 import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
38 import Duckling.Types (Seal(..))
39 import Gargantext.Core (Lang(FR,EN))
40 import Gargantext.Core.Types (DebugMode(..), withDebugMode)
41 import Gargantext.Prelude
42 import System.Environment (getEnv)
43 import qualified Data.Aeson as Json
44 import qualified Data.HashSet as HashSet
45 import qualified Data.Time.Calendar as DTC
46 import qualified Duckling.Core as DC
47 import qualified Data.List as List
48 ------------------------------------------------------------------------
49 -- | Parse date to Ints
50 -- TODO add hours, minutes and seconds
51 dateSplit :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
52 dateSplit _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
53 dateSplit l (Just txt) = do
54 utcTime <- parse l txt
55 let (y, m, d) = split' utcTime
56 pure (Just utcTime, (Just y, Just m,Just d))
58 split' :: UTCTime -> (Year, Month, Day)
59 split' utcTime = (fromIntegral y, m, d)
61 (UTCTime day _) = utcTime
62 (y,m,d) = toGregorian day
67 ------------------------------------------------------------------------
70 -- Parses dates mentions in full text given the language.
71 -- >>> parse FR (pack "1 avril 1900 à 19H")
72 -- 1900-04-01 19:00:00 UTC
73 -- >>> parse EN (pack "April 1 1900")
74 -- 1900-04-01 00:00:00 UTC
75 parse :: Lang -> Text -> IO UTCTime
77 -- printDebug "Date: " s
78 let result = dateFlow (DucklingFailure s)
79 --printDebug "Date': " dateStr'
81 DateFlowSuccess ok -> pure ok
82 DateFlowFailure -> (withDebugMode (DebugMode True)
83 "[G.C.T.P.T.Date parse]" (lang,s)
85 _ -> panic "[G.C.T.C.Parsers.Date] parse: Should not happen"
88 defaultDate = "0-0-0T0:0:0"
90 type DateFormat = Text
91 type DateDefault = Text
94 data DateFlow = DucklingSuccess { ds_result :: Text }
95 | DucklingFailure { df_result :: Text }
96 | ReadFailure1 { rf1_result :: Text }
97 | ReadFailure2 { rf2_result :: Text }
98 | DateFlowSuccess { success :: UTCTime }
103 dateFlow :: DateFlow -> DateFlow
104 dateFlow (DucklingSuccess res) = case (head $ splitOn "." res) of
105 Nothing -> dateFlow (ReadFailure1 res)
106 Just re -> case readDate res of
107 Nothing -> dateFlow (ReadFailure1 re)
108 Just ok -> DateFlowSuccess ok
109 --dateFlow (DucklingFailure txt) = case readDate $ replace " " "T" txt of
110 dateFlow (DucklingFailure txt) = case readDate (fromMaybe "" $ headMay $ List.filter (/= "") $ splitOn " " txt) of
111 Nothing -> dateFlow (ReadFailure1 txt)
112 Just ok -> DateFlowSuccess ok
113 dateFlow (ReadFailure1 txt) = case readDate txt of
114 Nothing -> dateFlow $ ReadFailure2 txt
115 Just ok -> DateFlowSuccess ok
116 dateFlow (ReadFailure2 txt) = case readDate $ replace " " "" txt <> "-01-01" of
117 Nothing -> DateFlowFailure
118 Just ok -> DateFlowSuccess ok
119 dateFlow _ = DateFlowFailure
122 readDate :: Text -> Maybe UTCTime
124 --let format = cs $ iso8601DateFormat (Just "%F %H:%M:%S")
125 let format = cs $ iso8601DateFormat Nothing
126 parseTimeM True defaultTimeLocale (unpack format) (cs txt)
129 -- TODO add Paris at Duckling.Locale Region datatype
130 -- | To get Homogeinity of the languages
131 -- TODO : put this in a more generic place in the source code
132 parserLang :: Lang -> DC.Lang
133 parserLang FR = DC.FR
134 parserLang EN = DC.EN
135 parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (cs $ show lang)
137 -- | Final Date parser API
138 -- IO can be avoided here:
139 -- currentContext :: Lang -> IO Context
140 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
141 -- parseRaw :: Context -> Text -> SomeErrorHandling Text
144 parseRawSafe :: Lang -> Text -> IO DateFlow
145 parseRawSafe lang text = do
146 let triedParseRaw = parseRaw lang text
147 dateStr' <- case triedParseRaw of
148 --Left (CE.SomeException err) -> do
150 _envLang <- getEnv "LANG"
151 -- printDebug "[G.C.T.C.P.Date] Exception: " (err, envLang, lang, text)
152 pure $ DucklingFailure text
153 Right res -> pure $ DucklingSuccess res
156 --tryParseRaw :: CE.Exception e => Lang -> Text -> IO (Either e Text)
157 --tryParseRaw lang text = CE.try (parseRaw lang text)
159 parseRaw :: Lang -> Text -> Either Text Text
160 parseRaw lang text = do -- case result
161 let maybeResult = extractValue $ getTimeValue
162 $ parseDateWithDuckling lang text (Options True)
164 Just result -> Right result
166 -- printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) text
167 Left $ "[G.C.T.C.P.D.parseRaw ERROR] " <> (cs . show) lang <> " :: " <> text
169 getTimeValue :: [ResolvedToken] -> Maybe Value
170 getTimeValue rt = case head rt of
173 Just x -> case rval x of
174 RVal Time t -> Just $ toJSON t
178 extractValue :: Maybe Value -> Maybe Text
179 extractValue (Just (Json.Object object)) =
180 case HM.lookup "value" object of
181 Just (Json.String date) -> Just date
183 extractValue _ = Nothing
185 -- | Current Time in DucklingTime format
186 -- TODO : get local Time in a more generic way
187 utcToDucklingTime :: UTCTime -> DucklingTime
188 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
190 -- | Local Context which depends on Lang and Time
191 localContext :: Lang -> DucklingTime -> Context
192 localContext lang dt = Context { referenceTime = dt
193 , locale = makeLocale (parserLang lang) Nothing }
195 defaultDay :: DTC.Day
196 defaultDay = DTC.fromGregorian 1 1 1
198 defaultUTCTime :: UTCTime
199 defaultUTCTime = UTCTime { utctDay = defaultDay
200 , utctDayTime = secondsToDiffTime 0 }
202 -- | Date parser with Duckling
203 parseDateWithDuckling :: Lang -> Text -> Options -> [ResolvedToken]
204 parseDateWithDuckling lang input options = do
205 let contxt = localContext lang $ utcToDucklingTime defaultUTCTime
206 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
207 -- TODO check/test Options False or True
208 analyze input contxt options $ HashSet.fromList [(Seal Time)]