2 Module : Gargantext.Core.Text.Corpus.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDateRaw returns date as Text:
13 import Gargantext.Core.Text.Corpus.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE TypeFamilies #-}
19 module Gargantext.Core.Text.Corpus.Parsers.Date {-(parse, parseRaw, dateSplit, Year, Month, Day)-} where
21 import Data.Aeson (toJSON, Value)
22 import Data.HashMap.Strict as HM hiding (map)
23 import Data.Text (Text, unpack, splitOn, pack)
24 import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian)
25 import Data.Time.Clock (UTCTime(..), getCurrentTime)
26 import Data.Time.LocalTime (utc)
27 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
28 import Duckling.Api (analyze)
29 import Duckling.Core (makeLocale, Dimension(Time))
30 import Duckling.Types (Seal(..))
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
32 import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
33 import Gargantext.Core (Lang(FR,EN))
34 import Gargantext.Prelude
35 import qualified Data.Aeson as Json
36 import qualified Data.HashSet as HashSet
37 import qualified Duckling.Core as DC
39 ------------------------------------------------------------------------
40 -- | Parse date to Ints
41 -- TODO add hours, minutes and seconds
42 dateSplit :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
43 dateSplit _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
44 dateSplit l (Just txt) = do
45 utcTime <- parse l txt
46 let (y, m, d) = split' utcTime
47 pure (Just utcTime, (Just y, Just m,Just d))
49 split' :: UTCTime -> (Year, Month, Day)
50 split' utcTime = (fromIntegral y, m, d)
52 (UTCTime day _) = utcTime
53 (y,m,d) = toGregorian day
58 ------------------------------------------------------------------------
61 -- Parses dates mentions in full text given the language.
62 -- >>> parseDate FR (pack "10 avril 1979 à 19H")
63 -- 1979-04-10 19:00:00 UTC
64 -- >>> parseDate EN (pack "April 10 1979")
65 -- 1979-04-10 00:00:00 UTC
66 parse :: Lang -> Text -> IO UTCTime
67 parse lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
69 type DateFormat = Text
70 type DateDefault = Text
72 parseDate' :: DateFormat -> DateDefault -> Lang -> Text -> IO UTCTime
73 parseDate' format def lang s = do
74 dateStr' <- parseRaw lang s
75 let dateStr = unpack $ maybe def identity
76 $ head $ splitOn "." dateStr'
77 pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr
80 -- TODO add Paris at Duckling.Locale Region datatype
81 -- | To get Homogeinity of the languages
82 -- TODO : put this in a more generic place in the source code
83 parserLang :: Lang -> DC.Lang
86 parserLang _ = panic "not implemented"
88 -- | Final Date parser API
89 -- IO can be avoided here:
90 -- currentContext :: Lang -> IO Context
91 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
92 -- parseRaw :: Context -> Text -> SomeErrorHandling Text
94 -- TODO error handling
95 parseRaw :: Lang -> Text -> IO Text
96 parseRaw lang text = do -- case result
97 maybeResult <- extractValue <$> getTimeValue <$> parseDateWithDuckling lang text (Options True)
99 Just result -> pure result
100 Nothing -> panic $ "[G.C.T.C.P.D.parseRaw] ERROR" <> (pack . show) lang <> " " <> text
103 getTimeValue :: [ResolvedToken] -> Value
104 getTimeValue rt = case head rt of
105 Nothing -> panic "error"
106 Just x -> case rval x of
107 RVal Time t -> toJSON t
110 extractValue :: Value -> Maybe Text
111 extractValue (Json.Object object) =
112 case HM.lookup "value" object of
113 Just (Json.String date) -> Just date
115 extractValue _ = Nothing
117 -- | Current Time in DucklingTime format
118 -- TODO : get local Time in a more generic way
119 utcToDucklingTime :: UTCTime -> DucklingTime
120 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
122 -- | Local Context which depends on Lang and Time
123 localContext :: Lang -> DucklingTime -> Context
124 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
126 -- | Date parser with Duckling
127 parseDateWithDuckling :: Lang -> Text -> Options -> IO [ResolvedToken]
128 parseDateWithDuckling lang input options = do
129 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
130 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
131 -- TODO check/test Options False or True
132 pure $ analyze input contxt options $ HashSet.fromList [(Seal Time)]