]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/Date.hs
Merge branch 'dev-ngrams-repo' of ssh://delanoe.org/haskell-gargantext into dev-ngram...
[gargantext.git] / src / Gargantext / Text / Parsers / Date.hs
1 {-|
2 Module : Gargantext.Text.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 According to the language of the text, parseDateRaw returns date as Text:
11
12 TODO : Add some tests
13 import Gargantext.Text.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
15 -}
16
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
20
21 module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw) where
22
23 import Data.HashMap.Strict as HM hiding (map)
24 import Data.Text (Text, unpack, splitOn, pack)
25 import Data.Time (parseTimeOrError, defaultTimeLocale)
26 import Data.Time.Clock (UTCTime, getCurrentTime)
27 import Data.Time.LocalTime (utc)
28 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
29 import Duckling.Api (analyze)
30 import Duckling.Core (makeLocale, Some(This), Dimension(Time))
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime))
32 import Duckling.Types (ResolvedToken)
33 import Duckling.Types (jsonValue)
34 import Gargantext.Core (Lang(FR,EN))
35 import Gargantext.Prelude
36 import qualified Data.Aeson as Json
37 import qualified Data.HashSet as HashSet
38 import qualified Duckling.Core as DC
39
40 -- | Unused import (to parse Date Format, keeping it for maybe next steps)
41 -- import Control.Monad ((=<<))
42 -- import Data.Either (Either)
43 -- import Data.Fixed (Fixed (MkFixed))
44 -- import Data.Foldable (length)
45 -- import Data.String (String)
46 -- import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
47 -- import Data.Time.Calendar (Day, fromGregorian)
48 -- import Duckling.Debug as DB
49 -- import Duckling.Engine (parseAndResolve)
50 -- import Duckling.Rules (rulesFor)
51 -- import Prelude (toInteger, div, otherwise, (++))
52 -- import Text.Parsec.Error (ParseError)
53 -- import Text.Parsec.Prim (Stream, ParsecT)
54 -- import Text.Parsec.String (Parser)
55 -- import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
56 -- import Text.XML.HXT.DOM.Util (decimalStringToInt)
57 -- import qualified Text.ParserCombinators.Parsec (parse)
58
59 ------------------------------------------------------------------------
60 -- | Date Parser
61 -- Parses dates mentions in full text given the language.
62 -- >>> parseDate FR (pack "10 avril 1979 à 19H")
63 -- 1979-04-10 19:00:00 UTC
64 -- >>> parseDate EN (pack "April 10 1979")
65 -- 1979-04-10 00:00:00 UTC
66 parseDate :: Lang -> Text -> IO UTCTime
67 parseDate lang s = do
68 dateStr' <- parseDateRaw lang s
69 let format = "%Y-%m-%dT%T"
70 let dateStr = unpack $ maybe "0-0-0T0:0:0" identity
71 $ head $ splitOn "." dateStr'
72 pure $ parseTimeOrError True defaultTimeLocale format dateStr
73
74
75
76 -- TODO add Paris at Duckling.Locale Region datatype
77 -- | To get Homogeinity of the languages
78 -- TODO : put this in a more generic place in the source code
79 parserLang :: Lang -> DC.Lang
80 parserLang FR = DC.FR
81 parserLang EN = DC.EN
82 -- parserLang _ = panic "not implemented"
83
84 -- | Final Date parser API
85 -- IO can be avoided here:
86 -- currentContext :: Lang -> IO Context
87 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
88 -- parseDateRaw :: Context -> Text -> SomeErrorHandling Text
89
90 -- TODO error handling
91 parseDateRaw :: Lang -> Text -> IO (Text)
92 parseDateRaw lang text = do
93 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
94 case headMay maybeJson of
95 Just (Json.Object object) -> case HM.lookup "value" object of
96 Just (Json.String date) -> pure date
97 Just _ -> panic "ParseDateRaw ERROR: should be a json String"
98 Nothing -> panic $ "ParseDateRaw ERROR: no date found" <> (pack . show) lang <> " " <> text
99
100 _ -> panic $ "ParseDateRaw ERROR: type error" <> (pack . show) lang <> " " <> text
101
102
103 -- | Current Time in DucklingTime format
104 -- TODO : get local Time in a more generic way
105 utcToDucklingTime :: UTCTime -> DucklingTime
106 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
107
108 -- | Local Context which depends on Lang and Time
109 localContext :: Lang -> DucklingTime -> Context
110 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
111
112 -- | Date parser with Duckling
113 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
114 parseDateWithDuckling lang input = do
115 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
116 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
117 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
118
119 -- | Permit to transform a String to an Int in a monadic context
120 --wrapDST :: Monad m => String -> m Int
121 --wrapDST = (return . decimalStringToInt)
122
123 -- | Generic parser which take at least one element not given in argument
124 --many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
125 --many1NoneOf = (many1 . noneOf)
126
127 --getMultiplicator :: Int -> Int
128 --getMultiplicator a
129 -- | 0 >= a = 1
130 -- | otherwise = 10 * (getMultiplicator $ div a 10)
131
132 -- | Parser for date format y-m-d
133 --parseGregorian :: Parser Day
134 --parseGregorian = do
135 -- y <- wrapDST =<< many1NoneOf ['-']
136 -- _ <- char '-'
137 -- m <- wrapDST =<< many1NoneOf ['-']
138 -- _ <- char '-'
139 -- d <- wrapDST =<< many1NoneOf ['T']
140 -- _ <- char 'T'
141 -- return $ fromGregorian (toInteger y) m d
142 --
143 ---- | Parser for time format h:m:s
144 --parseTimeOfDay :: Parser TimeOfDay
145 --parseTimeOfDay = do
146 -- h <- wrapDST =<< many1NoneOf [':']
147 -- _ <- char ':'
148 -- m <- wrapDST =<< many1NoneOf [':']
149 -- _ <- char ':'
150 -- r <- many1NoneOf ['.']
151 -- _ <- char '.'
152 -- dec <- many1NoneOf ['+', '-']
153 -- let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
154 -- seconds = nb * 10^(12-l)
155 -- return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
156 --
157 --
158 -- | Parser for timezone format +hh:mm
159 --parseTimeZone :: Parser TimeZone
160 --parseTimeZone = do
161 -- sign <- oneOf ['+', '-']
162 -- h <- wrapDST =<< many1NoneOf [':']
163 -- _ <- char ':'
164 -- m <- wrapDST =<< (many1 $ anyChar)
165 -- let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
166 -- in return $ TimeZone timeInMinute False "CET"
167 --
168 ---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
169 --parseZonedTime :: Parser ZonedTime
170 --parseZonedTime= do
171 -- d <- parseGregorian
172 -- tod <- parseTimeOfDay
173 -- tz <- parseTimeZone
174 -- return $ ZonedTime (LocalTime d (tod)) tz
175 --
176 ---- | Opposite of toRFC3339
177 --fromRFC3339 :: Text -> Either ParseError ZonedTime
178 --fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
179 -- where input = unpack t