2 Module : Gargantext.Core.Text.Corpus.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 According to the language of the text, parseDateRaw returns date as Text:
13 import Gargantext.Core.Text.Corpus.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
17 {-# LANGUAGE TypeFamilies #-}
19 module Gargantext.Core.Text.Corpus.Parsers.Date
20 {-(parse, parseRaw, dateSplit, Year, Month, Day)-}
23 import Data.Aeson (toJSON, Value)
24 import Data.Either (Either(..))
25 import Data.HashMap.Strict as HM hiding (map)
26 import Data.Text (Text, unpack, splitOn)
27 import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian)
28 import qualified Data.Time.Calendar as DTC
29 import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
30 import Data.Time.LocalTime (utc)
31 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
32 import Duckling.Api (analyze)
33 import Duckling.Core (makeLocale, Dimension(Time))
34 import Duckling.Types (Seal(..))
35 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
36 import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
37 import Gargantext.Core (Lang(FR,EN))
38 import Gargantext.Prelude
39 import qualified Data.Aeson as Json
40 import qualified Data.HashSet as HashSet
41 import qualified Duckling.Core as DC
43 ------------------------------------------------------------------------
44 -- | Parse date to Ints
45 -- TODO add hours, minutes and seconds
46 dateSplit :: Lang -> Maybe Text -> (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
47 dateSplit _ Nothing = (Nothing, (Nothing, Nothing, Nothing))
48 dateSplit l (Just txt) = do
49 let utcTime = parse l txt
50 let (y, m, d) = split' utcTime
51 (Just utcTime, (Just y, Just m,Just d))
53 split' :: UTCTime -> (Year, Month, Day)
54 split' utcTime = (fromIntegral y, m, d)
56 (UTCTime day _) = utcTime
57 (y,m,d) = toGregorian day
62 ------------------------------------------------------------------------
65 -- Parses dates mentions in full text given the language.
66 -- >>> parseDate FR (pack "10 avril 1979 à 19H")
67 -- 1979-04-10 19:00:00 UTC
68 -- >>> parseDate EN (pack "April 10 1979")
69 -- 1979-04-10 00:00:00 UTC
70 parse :: Lang -> Text -> UTCTime
71 parse = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0"
73 type DateFormat = Text
74 type DateDefault = Text
76 parseDate' :: DateFormat
81 parseDate' format def lang s = do
82 let dateStr' = parseRaw lang s
84 Left _err -> defaultUTCTime
85 Right "" -> defaultUTCTime
91 parseTimeOrError True defaultTimeLocale (unpack format) dateStr
94 -- TODO add Paris at Duckling.Locale Region datatype
95 -- | To get Homogeinity of the languages
96 -- TODO : put this in a more generic place in the source code
97 parserLang :: Lang -> DC.Lang
100 parserLang _ = panic "not implemented"
102 -- | Final Date parser API
103 -- IO can be avoided here:
104 -- currentContext :: Lang -> IO Context
105 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
106 -- parseRaw :: Context -> Text -> SomeErrorHandling Text
108 parseRaw :: Lang -> Text -> Either Text Text
109 parseRaw lang text = do -- case result
110 let maybeResult = extractValue $ getTimeValue
111 $ parseDateWithDuckling lang text (Options True)
113 Just result -> Right result
115 -- printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) text
116 Left $ "[G.C.T.C.P.D.parseRaw ERROR] " <> (cs . show) lang <> " :: " <> text
118 getTimeValue :: [ResolvedToken] -> Maybe Value
119 getTimeValue rt = case head rt of
122 Just x -> case rval x of
123 RVal Time t -> Just $ toJSON t
127 extractValue :: Maybe Value -> Maybe Text
128 extractValue (Just (Json.Object object)) =
129 case HM.lookup "value" object of
130 Just (Json.String date) -> Just date
132 extractValue _ = Nothing
134 -- | Current Time in DucklingTime format
135 -- TODO : get local Time in a more generic way
136 utcToDucklingTime :: UTCTime -> DucklingTime
137 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
139 -- | Local Context which depends on Lang and Time
140 localContext :: Lang -> DucklingTime -> Context
141 localContext lang dt = Context { referenceTime = dt
142 , locale = makeLocale (parserLang lang) Nothing }
144 defaultDay :: DTC.Day
145 defaultDay = DTC.fromGregorian 1 1 1
147 defaultUTCTime :: UTCTime
148 defaultUTCTime = UTCTime { utctDay = defaultDay
149 , utctDayTime = secondsToDiffTime 0 }
151 -- | Date parser with Duckling
152 parseDateWithDuckling :: Lang -> Text -> Options -> [ResolvedToken]
153 parseDateWithDuckling lang input options = do
154 let contxt = localContext lang $ utcToDucklingTime defaultUTCTime
155 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
156 -- TODO check/test Options False or True
157 analyze input contxt options $ HashSet.fromList [(Seal Time)]