]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/Date.hs
[DATE] parser -> UTCTime
[gargantext.git] / src / Gargantext / Text / Parsers / Date.hs
1 {-|
2 Module : Gargantext.Text.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 According to the language of the text, parseDateRaw returns date as Text:
11
12 TODO : Add some tests
13 import Gargantext.Text.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
15 -}
16
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
20
21 module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw) where
22
23 import Data.HashMap.Strict as HM hiding (map)
24 import Data.Text (Text, unpack, splitOn)
25 import Data.Time (parseTimeOrError, defaultTimeLocale)
26 import Data.Time.Clock (UTCTime, getCurrentTime)
27 import Data.Time.LocalTime (utc)
28 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
29 import Duckling.Api (analyze)
30 import Duckling.Core (makeLocale, Some(This), Dimension(Time))
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime))
32 import Duckling.Types (ResolvedToken)
33 import Duckling.Types (jsonValue)
34 import Gargantext.Core (Lang(FR,EN))
35 import Gargantext.Prelude
36 import qualified Data.Aeson as Json
37 import qualified Data.HashSet as HashSet
38 import qualified Duckling.Core as DC
39
40 -- | Unused import (to parse Date Format, keeping it for maybe next steps)
41 -- import Control.Monad ((=<<))
42 -- import Data.Either (Either)
43 -- import Data.Fixed (Fixed (MkFixed))
44 -- import Data.Foldable (length)
45 -- import Data.String (String)
46 -- import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
47 -- import Data.Time.Calendar (Day, fromGregorian)
48 -- import Duckling.Debug as DB
49 -- import Duckling.Engine (parseAndResolve)
50 -- import Duckling.Rules (rulesFor)
51 -- import Prelude (toInteger, div, otherwise, (++))
52 -- import Text.Parsec.Error (ParseError)
53 -- import Text.Parsec.Prim (Stream, ParsecT)
54 -- import Text.Parsec.String (Parser)
55 -- import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
56 -- import Text.XML.HXT.DOM.Util (decimalStringToInt)
57 -- import qualified Text.ParserCombinators.Parsec (parse)
58
59 ------------------------------------------------------------------------
60 parseDate :: Lang -> Text -> IO UTCTime
61 parseDate lang s = do
62 dateStr' <- parseDateRaw lang s
63 let format = "%Y-%m-%dT%T"
64 let dateStr = unpack $ maybe "0-0-0T0:0:0" identity
65 $ head $ splitOn "." dateStr'
66 pure $ parseTimeOrError True defaultTimeLocale format dateStr
67
68
69
70 -- TODO add Paris at Duckling.Locale Region datatype
71 -- | To get Homogeinity of the languages
72 -- TODO : put this in a more generic place in the source code
73 parserLang :: Lang -> DC.Lang
74 parserLang FR = DC.FR
75 parserLang EN = DC.EN
76 -- parserLang _ = panic "not implemented"
77
78 -- | Final Date parser API
79 -- IO can be avoided here:
80 -- currentContext :: Lang -> IO Context
81 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
82 -- parseDateRaw :: Context -> Text -> SomeErrorHandling Text
83
84 -- TODO error handling
85 parseDateRaw :: Lang -> Text -> IO Text
86 parseDateRaw lang text = do
87 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
88 case headMay maybeJson of
89 Just (Json.Object object) -> case HM.lookup "value" object of
90 Just (Json.String date) -> pure date
91 Just _ -> panic "ParseDateRaw ERROR: should be a json String"
92 Nothing -> panic "ParseDateRaw ERROR: no date found"
93 _ -> panic "ParseDateRaw ERROR: type error"
94
95
96 -- | Current Time in DucklingTime format
97 -- TODO : get local Time in a more generic way
98 utcToDucklingTime :: UTCTime -> DucklingTime
99 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
100
101 -- | Local Context which depends on Lang and Time
102 localContext :: Lang -> DucklingTime -> Context
103 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
104
105 -- | Date parser with Duckling
106 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
107 parseDateWithDuckling lang input = do
108 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
109 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
110 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
111
112 -- | Permit to transform a String to an Int in a monadic context
113 --wrapDST :: Monad m => String -> m Int
114 --wrapDST = (return . decimalStringToInt)
115
116 -- | Generic parser which take at least one element not given in argument
117 --many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
118 --many1NoneOf = (many1 . noneOf)
119
120 --getMultiplicator :: Int -> Int
121 --getMultiplicator a
122 -- | 0 >= a = 1
123 -- | otherwise = 10 * (getMultiplicator $ div a 10)
124
125 -- | Parser for date format y-m-d
126 --parseGregorian :: Parser Day
127 --parseGregorian = do
128 -- y <- wrapDST =<< many1NoneOf ['-']
129 -- _ <- char '-'
130 -- m <- wrapDST =<< many1NoneOf ['-']
131 -- _ <- char '-'
132 -- d <- wrapDST =<< many1NoneOf ['T']
133 -- _ <- char 'T'
134 -- return $ fromGregorian (toInteger y) m d
135 --
136 ---- | Parser for time format h:m:s
137 --parseTimeOfDay :: Parser TimeOfDay
138 --parseTimeOfDay = do
139 -- h <- wrapDST =<< many1NoneOf [':']
140 -- _ <- char ':'
141 -- m <- wrapDST =<< many1NoneOf [':']
142 -- _ <- char ':'
143 -- r <- many1NoneOf ['.']
144 -- _ <- char '.'
145 -- dec <- many1NoneOf ['+', '-']
146 -- let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
147 -- seconds = nb * 10^(12-l)
148 -- return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
149 --
150 --
151 -- | Parser for timezone format +hh:mm
152 --parseTimeZone :: Parser TimeZone
153 --parseTimeZone = do
154 -- sign <- oneOf ['+', '-']
155 -- h <- wrapDST =<< many1NoneOf [':']
156 -- _ <- char ':'
157 -- m <- wrapDST =<< (many1 $ anyChar)
158 -- let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
159 -- in return $ TimeZone timeInMinute False "CET"
160 --
161 ---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
162 --parseZonedTime :: Parser ZonedTime
163 --parseZonedTime= do
164 -- d <- parseGregorian
165 -- tod <- parseTimeOfDay
166 -- tz <- parseTimeZone
167 -- return $ ZonedTime (LocalTime d (tod)) tz
168 --
169 ---- | Opposite of toRFC3339
170 --fromRFC3339 :: Text -> Either ParseError ZonedTime
171 --fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
172 -- where input = unpack t