2 Module : Gargantext.Text.Corpus.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDateRaw returns date as Text:
13 import Gargantext.Text.Corpus.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
21 module Gargantext.Text.Corpus.Parsers.Date (parse, parseRaw, dateSplit, Year, Month, Day) where
23 import Data.HashMap.Strict as HM hiding (map)
24 import Data.Text (Text, unpack, splitOn, pack)
25 import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian)
26 import Data.Time.Clock (UTCTime(..), getCurrentTime)
27 import Data.Time.LocalTime (utc)
28 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
29 import Duckling.Api (analyze)
30 import Duckling.Core (makeLocale, Some(This), Dimension(Time))
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime))
32 import Duckling.Types (ResolvedToken)
33 import Duckling.Types (jsonValue)
34 import Gargantext.Core (Lang(FR,EN))
35 import Gargantext.Prelude
36 import qualified Data.Aeson as Json
37 import qualified Data.HashSet as HashSet
38 import qualified Duckling.Core as DC
40 ------------------------------------------------------------------------
41 -- | Parse date to Ints
42 -- TODO add hours, minutes and seconds
43 dateSplit :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
44 dateSplit _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
45 dateSplit l (Just txt) = do
46 utcTime <- parse l txt
47 let (y, m, d) = split' utcTime
48 pure (Just utcTime, (Just y, Just m,Just d))
50 split' :: UTCTime -> (Year, Month, Day)
51 split' utcTime = (fromIntegral y, m, d)
53 (UTCTime day _) = utcTime
54 (y,m,d) = toGregorian day
59 ------------------------------------------------------------------------
62 -- Parses dates mentions in full text given the language.
63 -- >>> parseDate FR (pack "10 avril 1979 à 19H")
64 -- 1979-04-10 19:00:00 UTC
65 -- >>> parseDate EN (pack "April 10 1979")
66 -- 1979-04-10 00:00:00 UTC
67 parse :: Lang -> Text -> IO UTCTime
68 parse lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
70 type DateFormat = Text
71 type DateDefault = Text
73 parseDate' :: DateFormat -> DateDefault -> Lang -> Text -> IO UTCTime
74 parseDate' format def lang s = do
75 dateStr' <- parseRaw lang s
76 let dateStr = unpack $ maybe def identity
77 $ head $ splitOn "." dateStr'
78 pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr
81 -- TODO add Paris at Duckling.Locale Region datatype
82 -- | To get Homogeinity of the languages
83 -- TODO : put this in a more generic place in the source code
84 parserLang :: Lang -> DC.Lang
87 parserLang _ = panic "not implemented"
89 -- | Final Date parser API
90 -- IO can be avoided here:
91 -- currentContext :: Lang -> IO Context
92 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
93 -- parseRaw :: Context -> Text -> SomeErrorHandling Text
95 -- TODO error handling
96 parseRaw :: Lang -> Text -> IO (Text)
97 parseRaw lang text = do
98 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
99 case headMay maybeJson of
100 Just (Json.Object object) -> case HM.lookup "value" object of
101 Just (Json.String date) -> pure date
102 Just _ -> panic "ParseRaw ERROR: should be a json String"
103 Nothing -> panic $ "ParseRaw ERROR: no date found" <> (pack . show) lang <> " " <> text
105 _ -> panic $ "ParseRaw ERROR: type error" <> (pack . show) lang <> " " <> text
108 -- | Current Time in DucklingTime format
109 -- TODO : get local Time in a more generic way
110 utcToDucklingTime :: UTCTime -> DucklingTime
111 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
113 -- | Local Context which depends on Lang and Time
114 localContext :: Lang -> DucklingTime -> Context
115 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
117 -- | Date parser with Duckling
118 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
119 parseDateWithDuckling lang input = do
120 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
121 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
122 pure $ analyze input contxt $ HashSet.fromList [(This Time)]