2 Module : Gargantext.Core.Text.Corpus.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDateRaw returns date as Text:
13 import Gargantext.Core.Text.Corpus.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE TypeFamilies #-}
19 module Gargantext.Core.Text.Corpus.Parsers.Date
20 {-(parse, parseRaw, dateSplit, Year, Month, Day)-}
23 import Data.Aeson (toJSON, Value)
24 import Data.HashMap.Strict as HM hiding (map)
25 import Data.Text (Text, unpack, splitOn)
26 import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian)
27 import Data.Time.Clock (UTCTime(..), getCurrentTime)
28 import Data.Time.LocalTime (utc)
29 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
30 import Duckling.Api (analyze)
31 import Duckling.Core (makeLocale, Dimension(Time))
32 import Duckling.Types (Seal(..))
33 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
34 import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
35 import Gargantext.Core (Lang(FR,EN))
36 import Gargantext.Prelude
37 import qualified Data.Aeson as Json
38 import qualified Data.HashSet as HashSet
39 import qualified Duckling.Core as DC
41 ------------------------------------------------------------------------
42 -- | Parse date to Ints
43 -- TODO add hours, minutes and seconds
44 dateSplit :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
45 dateSplit _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
46 dateSplit l (Just txt) = do
47 utcTime <- parse l txt
48 let (y, m, d) = split' utcTime
49 pure (Just utcTime, (Just y, Just m,Just d))
51 split' :: UTCTime -> (Year, Month, Day)
52 split' utcTime = (fromIntegral y, m, d)
54 (UTCTime day _) = utcTime
55 (y,m,d) = toGregorian day
60 ------------------------------------------------------------------------
63 -- Parses dates mentions in full text given the language.
64 -- >>> parseDate FR (pack "10 avril 1979 à 19H")
65 -- 1979-04-10 19:00:00 UTC
66 -- >>> parseDate EN (pack "April 10 1979")
67 -- 1979-04-10 00:00:00 UTC
68 parse :: Lang -> Text -> IO UTCTime
69 parse lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
71 type DateFormat = Text
72 type DateDefault = Text
74 parseDate' :: DateFormat
79 parseDate' format def lang s = do
80 dateStr' <- parseRaw lang s
87 $ splitOn "." dateStr'
88 pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr
91 -- TODO add Paris at Duckling.Locale Region datatype
92 -- | To get Homogeinity of the languages
93 -- TODO : put this in a more generic place in the source code
94 parserLang :: Lang -> DC.Lang
97 parserLang _ = panic "not implemented"
99 -- | Final Date parser API
100 -- IO can be avoided here:
101 -- currentContext :: Lang -> IO Context
102 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
103 -- parseRaw :: Context -> Text -> SomeErrorHandling Text
105 parseRaw :: Lang -> Text -> IO Text
106 parseRaw lang text = do -- case result
107 maybeResult <- extractValue <$> getTimeValue
108 <$> parseDateWithDuckling lang text (Options True)
110 Just result -> pure result
112 printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang)
116 getTimeValue :: [ResolvedToken] -> Maybe Value
117 getTimeValue rt = case head rt of
120 Just x -> case rval x of
121 RVal Time t -> Just $ toJSON t
125 extractValue :: Maybe Value -> Maybe Text
126 extractValue (Just (Json.Object object)) =
127 case HM.lookup "value" object of
128 Just (Json.String date) -> Just date
130 extractValue _ = Nothing
132 -- | Current Time in DucklingTime format
133 -- TODO : get local Time in a more generic way
134 utcToDucklingTime :: UTCTime -> DucklingTime
135 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
137 -- | Local Context which depends on Lang and Time
138 localContext :: Lang -> DucklingTime -> Context
139 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
141 -- | Date parser with Duckling
142 parseDateWithDuckling :: Lang -> Text -> Options -> IO [ResolvedToken]
143 parseDateWithDuckling lang input options = do
144 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
145 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
146 -- TODO check/test Options False or True
147 pure $ analyze input contxt options $ HashSet.fromList [(Seal Time)]