]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/Date.hs
[FIX] multiterms FR extraction.
[gargantext.git] / src / Gargantext / Text / Parsers / Date.hs
1 {-|
2 Module : Gargantext.Text.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 According to the language of the text, parseDate1 returns date as Text:
11
12 TODO : Add some tests
13 import Gargantext.Parsers.Date as DGP
14 DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
15 -}
16
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
20
21 module Gargantext.Text.Parsers.Date (parseDate1, parseDate, fromRFC3339, parseTimeOfDay, getMultiplicator) where
22
23 import Gargantext.Core (Lang(FR,EN))
24 import Gargantext.Prelude
25 import Prelude (toInteger, div, otherwise, (++))
26 --import Gargantext.Types.Main as G
27
28 import Data.Time.Clock (UTCTime, getCurrentTime)
29 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
30 import Data.Time.LocalTime (utc)
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
32 , DucklingTime(DucklingTime)
33 )
34 import Duckling.Core (makeLocale, Some(This), Dimension(Time))
35 import qualified Duckling.Core as DC
36 import Duckling.Types (jsonValue, Entity)
37
38 import Duckling.Api (analyze, parse)
39 import qualified Data.HashSet as HashSet
40 import qualified Data.Aeson as Json
41 import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
42 import Data.Time.Calendar (Day, fromGregorian)
43 import Data.Fixed (Fixed (MkFixed))
44 import Data.Foldable (length)
45 import Data.HashMap.Strict as HM hiding (map)
46
47 import Control.Monad ((=<<))
48 import Data.Either (Either)
49 import Data.String (String)
50 import Data.Text (Text, unpack)
51 -- import Duckling.Engine (parseAndResolve)
52 -- import Duckling.Rules (rulesFor)
53 -- import Duckling.Debug as DB
54
55 import Duckling.Types (ResolvedToken)
56 import Safe (headMay)
57
58 import Text.Parsec.Error (ParseError)
59 import Text.Parsec.String (Parser)
60 import Text.Parsec.Prim (Stream, ParsecT)
61 import qualified Text.ParserCombinators.Parsec (parse)
62 import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
63
64 import Text.XML.HXT.DOM.Util (decimalStringToInt)
65 -- TODO add Paris at Duckling.Locale Region datatype
66 -- | To get Homogeinity of the languages
67 -- TODO : put this in a more generic place in the source code
68 parserLang :: Lang -> DC.Lang
69 parserLang FR = DC.FR
70 parserLang EN = DC.EN
71
72
73 -- | Final Date parser API
74 -- IO can be avoided here:
75 -- currentContext :: Lang -> IO Context
76 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
77 -- parseDate1 :: Context -> Text -> SomeErrorHandling Text
78
79 parseDate1 :: Lang -> Text -> IO Text
80 parseDate1 lang text = do
81 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
82 case headMay maybeJson of
83 Just (Json.Object object) -> case HM.lookup "value" object of
84 Just (Json.String date) -> pure date
85 Just _ -> panic "ParseDate ERROR: should be a json String"
86 Nothing -> panic "ParseDate ERROR: no date found"
87 _ -> panic "ParseDate ERROR: type error"
88
89
90
91 -- | Current Time in DucklingTime format
92 -- TODO : get local Time in a more generic way
93 utcToDucklingTime :: UTCTime -> DucklingTime
94 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
95
96 -- | Local Context which depends on Lang and Time
97 localContext :: Lang -> DucklingTime -> Context
98 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
99
100 -- | Date parser with Duckling
101 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
102 parseDateWithDuckling lang input = do
103 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
104 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
105 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
106
107
108 parseDate :: Lang -> Text -> IO [Entity]
109 parseDate lang input = do
110 context <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
111 pure $ parse input context [(This Time)]
112
113 -- | Permit to transform a String to an Int in a monadic context
114 wrapDST :: Monad m => String -> m Int
115 wrapDST = (return . decimalStringToInt)
116
117 -- | Generic parser which take at least one element not given in argument
118 many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
119 many1NoneOf = (many1 . noneOf)
120
121 getMultiplicator :: Int -> Int
122 getMultiplicator a
123 | 0 >= a = 1
124 | otherwise = 10 * (getMultiplicator $ div a 10)
125
126 -- | Parser for date format y-m-d
127 parseGregorian :: Parser Day
128 parseGregorian = do
129 y <- wrapDST =<< many1NoneOf ['-']
130 _ <- char '-'
131 m <- wrapDST =<< many1NoneOf ['-']
132 _ <- char '-'
133 d <- wrapDST =<< many1NoneOf ['T']
134 _ <- char 'T'
135 return $ fromGregorian (toInteger y) m d
136
137 -- | Parser for time format h:m:s
138 parseTimeOfDay :: Parser TimeOfDay
139 parseTimeOfDay = do
140 h <- wrapDST =<< many1NoneOf [':']
141 _ <- char ':'
142 m <- wrapDST =<< many1NoneOf [':']
143 _ <- char ':'
144 r <- many1NoneOf ['.']
145 _ <- char '.'
146 dec <- many1NoneOf ['+', '-']
147 let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
148 seconds = nb * 10^(12-l)
149 return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
150
151
152 -- | Parser for timezone format +hh:mm
153 parseTimeZone :: Parser TimeZone
154 parseTimeZone = do
155 sign <- oneOf ['+', '-']
156 h <- wrapDST =<< many1NoneOf [':']
157 _ <- char ':'
158 m <- wrapDST =<< (many1 $ anyChar)
159 let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
160 in return $ TimeZone timeInMinute False "CET"
161
162 -- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
163 parseZonedTime :: Parser ZonedTime
164 parseZonedTime= do
165 d <- parseGregorian
166 tod <- parseTimeOfDay
167 tz <- parseTimeZone
168 return $ ZonedTime (LocalTime d (tod)) tz
169
170 -- | Opposite of toRFC3339
171 fromRFC3339 :: Text -> Either ParseError ZonedTime
172 fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
173 where input = unpack t