]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Parsers/Date.hs
Add comments
[gargantext.git] / src / Gargantext / Parsers / Date.hs
1 {-|
2 Module : Gargantext.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 According to the language of the text, parseDate1 returns date as Text:
11
12 TODO : Add some tests
13 import Gargantext.Parsers.Date as DGP
14 DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
15 -}
16
17 {-# LANGUAGE OverloadedStrings #-}
18 {-# LANGUAGE FlexibleContexts #-}
19
20 module Gargantext.Parsers.Date (parseDate1, Lang(FR, EN), parseDate) where
21
22 import Gargantext.Prelude
23 --import Gargantext.Types.Main as G
24
25 import Data.Time.Clock (UTCTime, getCurrentTime)
26 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
27 import Data.Time.LocalTime (utc)
28 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
29 , DucklingTime(DucklingTime)
30 )
31 import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time))
32 import Duckling.Types (jsonValue, Entity)
33
34 import Duckling.Api (analyze, parse)
35 import qualified Data.HashSet as HashSet
36 import qualified Data.Aeson as Json
37 import Data.HashMap.Strict as HM
38 import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..), getCurrentTimeZone)
39 import Data.Time.Calendar (Day, fromGregorian)
40 import Data.Fixed (Fixed (MkFixed))
41
42 import Data.Text (Text, unpack)
43 -- import Duckling.Engine (parseAndResolve)
44 -- import Duckling.Rules (rulesFor)
45 -- import Duckling.Debug as DB
46
47 import Duckling.Types (ResolvedToken)
48 import Safe (headMay)
49 import System.IO.Unsafe (unsafePerformIO)
50
51 import Text.Parsec.Error (ParseError)
52 import Text.Parsec.String (Parser)
53 import Text.Parsec.Prim (Stream, ParsecT)
54 import qualified Text.ParserCombinators.Parsec (parse)
55 import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
56
57 import Text.XML.HXT.DOM.Util (decimalStringToInt)
58 -- TODO add Paris at Duckling.Locale Region datatype
59 -- | To get Homogeinity of the languages
60 -- TODO : put this in a more generic place in the source code
61 --parserLang :: G.Language -> Lang
62 --parserLang G.FR = FR
63 --parserLang G.EN = EN
64
65
66 -- | Final Date parser API
67 -- IO can be avoided here:
68 -- currentContext :: Lang -> IO Context
69 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
70 -- parseDate1 :: Context -> Text -> SomeErrorHandling Text
71 parseDate1 :: Lang -> Text -> IO Text
72 parseDate1 lang text = do
73 maybeJson <- pm jsonValue <$> parseDateWithDuckling lang text
74 case headMay maybeJson of
75 Just (Json.Object object) -> case HM.lookup "value" object of
76 Just (Json.String date) -> pure date
77 Just _ -> error "ParseDate ERROR: should be a json String"
78 Nothing -> error "ParseDate ERROR: no date found"
79 _ -> error "ParseDate ERROR: type error"
80
81
82
83 -- | Current Time in DucklingTime format
84 -- TODO : get local Time in a more generic way
85 utcToDucklingTime :: UTCTime -> DucklingTime
86 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
87
88 -- | Local Context which depends on Lang and Time
89 localContext :: Lang -> DucklingTime -> Context
90 localContext lang dt = Context {referenceTime = dt, locale = makeLocale lang Nothing}
91
92 -- | Date parser with Duckling
93 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
94 parseDateWithDuckling lang input = do
95 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
96 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
97 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
98
99
100 parseDate :: Lang -> Text -> IO [Entity]
101 parseDate lang input = do
102 context <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
103 pure $ parse input context [(This Time)]
104
105 -- | Permit to transform a String to an Int in a monadic context
106 wrapDST :: Monad m => String -> m Int
107 wrapDST = (return . decimalStringToInt)
108
109 -- | Generic parser which take at least one element not given in argument
110 many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
111 many1NoneOf = (many1 . noneOf)
112
113 -- | Parser for date format y-m-d
114 parseGregorian :: Parser Day
115 parseGregorian = do
116 y <- wrapDST =<< many1NoneOf ['-']
117 _ <- char '-'
118 m <- wrapDST =<< many1NoneOf ['-']
119 _ <- char '-'
120 d <- wrapDST =<< many1NoneOf ['T']
121 _ <- char 'T'
122 return $ fromGregorian (toInteger y) m d
123
124 -- | Parser for time format h:m:s
125 parseTimeOfDay :: Parser TimeOfDay
126 parseTimeOfDay = do
127 h <- wrapDST =<< many1NoneOf [':']
128 _ <- char ':'
129 m <- wrapDST =<< many1NoneOf [':']
130 _ <- char ':'
131 s <- wrapDST =<< many1NoneOf ['+', '-']
132 return $ TimeOfDay h m (MkFixed $ toInteger s)
133
134 -- | Parser for timezone format +hh:mm
135 parseTimeZone :: Parser TimeZone
136 parseTimeZone = do
137 sign <- oneOf ['+', '-']
138 h <- wrapDST =<< many1NoneOf [':']
139 _ <- char ':'
140 m <- wrapDST =<< (many1 $ anyChar)
141 let (TimeZone _ s n) = unsafePerformIO getCurrentTimeZone
142 let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
143 in return $ TimeZone timeInMinute s n
144
145 -- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
146 parseZonedTime :: Parser ZonedTime
147 parseZonedTime= do
148 d <- parseGregorian
149 tod <- parseTimeOfDay
150 tz <- parseTimeZone
151 return $ ZonedTime (LocalTime d (tod)) tz
152
153 -- | Opposite of toRFC3339
154 fromRFC3339 :: Text -> Either ParseError ZonedTime
155 fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
156 where input = unpack t