2 Module : Gargantext.Text.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDateRaw returns date as Text:
13 import Gargantext.Text.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
21 module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw) where
23 import Data.HashMap.Strict as HM hiding (map)
24 import Data.Text (Text, unpack, splitOn, pack)
25 import Data.Time (parseTimeOrError, defaultTimeLocale)
26 import Data.Time.Clock (UTCTime, getCurrentTime)
27 import Data.Time.LocalTime (utc)
28 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
29 import Duckling.Api (analyze)
30 import Duckling.Core (makeLocale, Some(This), Dimension(Time))
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime))
32 import Duckling.Types (ResolvedToken)
33 import Duckling.Types (jsonValue)
34 import Gargantext.Core (Lang(FR,EN))
35 import Gargantext.Prelude
36 import qualified Data.Aeson as Json
37 import qualified Data.HashSet as HashSet
38 import qualified Duckling.Core as DC
40 ------------------------------------------------------------------------
42 -- Parses dates mentions in full text given the language.
43 -- >>> parseDate FR (pack "10 avril 1979 à 19H")
44 -- 1979-04-10 19:00:00 UTC
45 -- >>> parseDate EN (pack "April 10 1979")
46 -- 1979-04-10 00:00:00 UTC
47 parseDate :: Lang -> Text -> IO UTCTime
48 parseDate lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
50 type DateFormat = Text
51 type DateDefault = Text
53 parseDate' :: DateFormat -> DateDefault -> Lang -> Text -> IO UTCTime
54 parseDate' format def lang s = do
55 dateStr' <- parseDateRaw lang s
56 let dateStr = unpack $ maybe def identity
57 $ head $ splitOn "." dateStr'
58 pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr
61 -- TODO add Paris at Duckling.Locale Region datatype
62 -- | To get Homogeinity of the languages
63 -- TODO : put this in a more generic place in the source code
64 parserLang :: Lang -> DC.Lang
67 -- parserLang _ = panic "not implemented"
69 -- | Final Date parser API
70 -- IO can be avoided here:
71 -- currentContext :: Lang -> IO Context
72 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
73 -- parseDateRaw :: Context -> Text -> SomeErrorHandling Text
75 -- TODO error handling
76 parseDateRaw :: Lang -> Text -> IO (Text)
77 parseDateRaw lang text = do
78 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
79 case headMay maybeJson of
80 Just (Json.Object object) -> case HM.lookup "value" object of
81 Just (Json.String date) -> pure date
82 Just _ -> panic "ParseDateRaw ERROR: should be a json String"
83 Nothing -> panic $ "ParseDateRaw ERROR: no date found" <> (pack . show) lang <> " " <> text
85 _ -> panic $ "ParseDateRaw ERROR: type error" <> (pack . show) lang <> " " <> text
88 -- | Current Time in DucklingTime format
89 -- TODO : get local Time in a more generic way
90 utcToDucklingTime :: UTCTime -> DucklingTime
91 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
93 -- | Local Context which depends on Lang and Time
94 localContext :: Lang -> DucklingTime -> Context
95 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
97 -- | Date parser with Duckling
98 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
99 parseDateWithDuckling lang input = do
100 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
101 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
102 pure $ analyze input contxt $ HashSet.fromList [(This Time)]