]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/Date.hs
Merge remote-tracking branch 'origin/lang-parser'
[gargantext.git] / src / Gargantext / Text / Parsers / Date.hs
1 {-|
2 Module : Gargantext.Text.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 According to the language of the text, parseDate1 returns date as Text:
11
12 TODO : Add some tests
13 import Gargantext.Parsers.Date as DGP
14 DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
15 -}
16
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
20
21 module Gargantext.Text.Parsers.Date (parseDate1, parseDate, fromRFC3339, parseTimeOfDay, getMultiplicator) where
22
23 import Gargantext.Core (Lang(FR,EN))
24 import Gargantext.Prelude
25 import Prelude (toInteger, div, otherwise, (++))
26 --import Gargantext.Types.Main as G
27
28 import Data.Time.Clock (UTCTime, getCurrentTime)
29 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
30 import Data.Time.LocalTime (utc)
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
32 , DucklingTime(DucklingTime)
33 )
34 import Duckling.Core (makeLocale, Some(This), Dimension(Time))
35 import qualified Duckling.Core as DC
36 import Duckling.Types (jsonValue, Entity)
37
38 import Duckling.Api (analyze, parse)
39 import qualified Data.HashSet as HashSet
40 import qualified Data.Aeson as Json
41 import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
42 import Data.Time.Calendar (Day, fromGregorian)
43 import Data.Fixed (Fixed (MkFixed))
44 import Data.Foldable (length)
45 import Data.HashMap.Strict as HM hiding (map)
46
47 import Control.Monad ((=<<))
48 import Data.Either (Either)
49 import Data.String (String)
50 import Data.Text (Text, unpack)
51 -- import Duckling.Engine (parseAndResolve)
52 -- import Duckling.Rules (rulesFor)
53 -- import Duckling.Debug as DB
54
55 import Duckling.Types (ResolvedToken)
56 import Safe (headMay)
57
58 import Text.Parsec.Error (ParseError)
59 import Text.Parsec.String (Parser)
60 import Text.Parsec.Prim (Stream, ParsecT)
61 import qualified Text.ParserCombinators.Parsec (parse)
62 import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
63
64 import Text.XML.HXT.DOM.Util (decimalStringToInt)
65 -- TODO add Paris at Duckling.Locale Region datatype
66 -- | To get Homogeinity of the languages
67 -- TODO : put this in a more generic place in the source code
68 parserLang :: Lang -> DC.Lang
69 parserLang FR = DC.FR
70 parserLang EN = DC.EN
71 -- parserLang _ = panic "not implemented"
72
73 -- | Final Date parser API
74 -- IO can be avoided here:
75 -- currentContext :: Lang -> IO Context
76 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
77 -- parseDate1 :: Context -> Text -> SomeErrorHandling Text
78
79 -- TODO error handling
80 parseDate1 :: Lang -> Text -> IO Text
81 parseDate1 lang text = do
82 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
83 case headMay maybeJson of
84 Just (Json.Object object) -> case HM.lookup "value" object of
85 Just (Json.String date) -> pure date
86 Just _ -> panic "ParseDate ERROR: should be a json String"
87 Nothing -> panic "ParseDate ERROR: no date found"
88 _ -> panic "ParseDate ERROR: type error"
89
90
91
92 -- | Current Time in DucklingTime format
93 -- TODO : get local Time in a more generic way
94 utcToDucklingTime :: UTCTime -> DucklingTime
95 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
96
97 -- | Local Context which depends on Lang and Time
98 localContext :: Lang -> DucklingTime -> Context
99 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
100
101 -- | Date parser with Duckling
102 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
103 parseDateWithDuckling lang input = do
104 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
105 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
106 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
107
108
109 parseDate :: Lang -> Text -> IO [Entity]
110 parseDate lang input = do
111 context <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
112 pure $ parse input context [(This Time)]
113
114 -- | Permit to transform a String to an Int in a monadic context
115 wrapDST :: Monad m => String -> m Int
116 wrapDST = (return . decimalStringToInt)
117
118 -- | Generic parser which take at least one element not given in argument
119 many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
120 many1NoneOf = (many1 . noneOf)
121
122 getMultiplicator :: Int -> Int
123 getMultiplicator a
124 | 0 >= a = 1
125 | otherwise = 10 * (getMultiplicator $ div a 10)
126
127 -- | Parser for date format y-m-d
128 parseGregorian :: Parser Day
129 parseGregorian = do
130 y <- wrapDST =<< many1NoneOf ['-']
131 _ <- char '-'
132 m <- wrapDST =<< many1NoneOf ['-']
133 _ <- char '-'
134 d <- wrapDST =<< many1NoneOf ['T']
135 _ <- char 'T'
136 return $ fromGregorian (toInteger y) m d
137
138 -- | Parser for time format h:m:s
139 parseTimeOfDay :: Parser TimeOfDay
140 parseTimeOfDay = do
141 h <- wrapDST =<< many1NoneOf [':']
142 _ <- char ':'
143 m <- wrapDST =<< many1NoneOf [':']
144 _ <- char ':'
145 r <- many1NoneOf ['.']
146 _ <- char '.'
147 dec <- many1NoneOf ['+', '-']
148 let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
149 seconds = nb * 10^(12-l)
150 return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
151
152
153 -- | Parser for timezone format +hh:mm
154 parseTimeZone :: Parser TimeZone
155 parseTimeZone = do
156 sign <- oneOf ['+', '-']
157 h <- wrapDST =<< many1NoneOf [':']
158 _ <- char ':'
159 m <- wrapDST =<< (many1 $ anyChar)
160 let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
161 in return $ TimeZone timeInMinute False "CET"
162
163 -- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
164 parseZonedTime :: Parser ZonedTime
165 parseZonedTime= do
166 d <- parseGregorian
167 tod <- parseTimeOfDay
168 tz <- parseTimeZone
169 return $ ZonedTime (LocalTime d (tod)) tz
170
171 -- | Opposite of toRFC3339
172 fromRFC3339 :: Text -> Either ParseError ZonedTime
173 fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
174 where input = unpack t