]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Parsers/Date.hs
[FEAT/Tokenizer] adding a great tokenizer thanks to Grzegorz Chrupała.
[gargantext.git] / src / Gargantext / Parsers / Date.hs
1 {-|
2 Module : Gargantext.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 According to the language of the text, parseDate1 returns date as Text:
11
12 TODO : Add some tests
13 import Gargantext.Parsers.Date as DGP
14 DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
15 -}
16
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
20
21 module Gargantext.Parsers.Date (parseDate1, Lang(FR, EN), parseDate, fromRFC3339, parseTimeOfDay, getMultiplicator) where
22
23 import Gargantext.Prelude
24 import Prelude (toInteger, div, otherwise, (++))
25 --import Gargantext.Types.Main as G
26
27 import Data.Time.Clock (UTCTime, getCurrentTime)
28 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
29 import Data.Time.LocalTime (utc)
30 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
31 , DucklingTime(DucklingTime)
32 )
33 import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time))
34 import Duckling.Types (jsonValue, Entity)
35
36 import Duckling.Api (analyze, parse)
37 import qualified Data.HashSet as HashSet
38 import qualified Data.Aeson as Json
39 import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
40 import Data.Time.Calendar (Day, fromGregorian)
41 import Data.Fixed (Fixed (MkFixed))
42 import Data.Foldable (length)
43 import Data.HashMap.Strict as HM hiding (map)
44
45 import Control.Monad ((=<<))
46 import Data.Either (Either)
47 import Data.String (String)
48 import Data.Text (Text, unpack)
49 -- import Duckling.Engine (parseAndResolve)
50 -- import Duckling.Rules (rulesFor)
51 -- import Duckling.Debug as DB
52
53 import Duckling.Types (ResolvedToken)
54 import Safe (headMay)
55
56 import Text.Parsec.Error (ParseError)
57 import Text.Parsec.String (Parser)
58 import Text.Parsec.Prim (Stream, ParsecT)
59 import qualified Text.ParserCombinators.Parsec (parse)
60 import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
61
62 import Text.XML.HXT.DOM.Util (decimalStringToInt)
63 -- TODO add Paris at Duckling.Locale Region datatype
64 -- | To get Homogeinity of the languages
65 -- TODO : put this in a more generic place in the source code
66 --parserLang :: G.Language -> Lang
67 --parserLang G.FR = FR
68 --parserLang G.EN = EN
69
70
71 -- | Final Date parser API
72 -- IO can be avoided here:
73 -- currentContext :: Lang -> IO Context
74 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
75 -- parseDate1 :: Context -> Text -> SomeErrorHandling Text
76
77 parseDate1 :: Lang -> Text -> IO Text
78 parseDate1 lang text = do
79 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
80 case headMay maybeJson of
81 Just (Json.Object object) -> case HM.lookup "value" object of
82 Just (Json.String date) -> pure date
83 Just _ -> panic "ParseDate ERROR: should be a json String"
84 Nothing -> panic "ParseDate ERROR: no date found"
85 _ -> panic "ParseDate ERROR: type error"
86
87
88
89 -- | Current Time in DucklingTime format
90 -- TODO : get local Time in a more generic way
91 utcToDucklingTime :: UTCTime -> DucklingTime
92 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
93
94 -- | Local Context which depends on Lang and Time
95 localContext :: Lang -> DucklingTime -> Context
96 localContext lang dt = Context {referenceTime = dt, locale = makeLocale lang Nothing}
97
98 -- | Date parser with Duckling
99 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
100 parseDateWithDuckling lang input = do
101 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
102 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
103 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
104
105
106 parseDate :: Lang -> Text -> IO [Entity]
107 parseDate lang input = do
108 context <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
109 pure $ parse input context [(This Time)]
110
111 -- | Permit to transform a String to an Int in a monadic context
112 wrapDST :: Monad m => String -> m Int
113 wrapDST = (return . decimalStringToInt)
114
115 -- | Generic parser which take at least one element not given in argument
116 many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
117 many1NoneOf = (many1 . noneOf)
118
119 getMultiplicator :: Int -> Int
120 getMultiplicator a
121 | 0 >= a = 1
122 | otherwise = 10 * (getMultiplicator $ div a 10)
123
124 -- | Parser for date format y-m-d
125 parseGregorian :: Parser Day
126 parseGregorian = do
127 y <- wrapDST =<< many1NoneOf ['-']
128 _ <- char '-'
129 m <- wrapDST =<< many1NoneOf ['-']
130 _ <- char '-'
131 d <- wrapDST =<< many1NoneOf ['T']
132 _ <- char 'T'
133 return $ fromGregorian (toInteger y) m d
134
135 -- | Parser for time format h:m:s
136 parseTimeOfDay :: Parser TimeOfDay
137 parseTimeOfDay = do
138 h <- wrapDST =<< many1NoneOf [':']
139 _ <- char ':'
140 m <- wrapDST =<< many1NoneOf [':']
141 _ <- char ':'
142 r <- many1NoneOf ['.']
143 _ <- char '.'
144 dec <- many1NoneOf ['+', '-']
145 let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
146 seconds = nb * 10^(12-l)
147 return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
148
149
150 -- | Parser for timezone format +hh:mm
151 parseTimeZone :: Parser TimeZone
152 parseTimeZone = do
153 sign <- oneOf ['+', '-']
154 h <- wrapDST =<< many1NoneOf [':']
155 _ <- char ':'
156 m <- wrapDST =<< (many1 $ anyChar)
157 let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
158 in return $ TimeZone timeInMinute False "CET"
159
160 -- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
161 parseZonedTime :: Parser ZonedTime
162 parseZonedTime= do
163 d <- parseGregorian
164 tod <- parseTimeOfDay
165 tz <- parseTimeZone
166 return $ ZonedTime (LocalTime d (tod)) tz
167
168 -- | Opposite of toRFC3339
169 fromRFC3339 :: Text -> Either ParseError ZonedTime
170 fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
171 where input = unpack t