]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Parsers/Date.hs
[PARSERS] RIS/PRESSE fix title and abstract field.
[gargantext.git] / src / Gargantext / Text / Parsers / Date.hs
1 {-|
2 Module : Gargantext.Text.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 According to the language of the text, parseDateRaw returns date as Text:
11
12 TODO : Add some tests
13 import Gargantext.Text.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
15 -}
16
17 {-# LANGUAGE FlexibleContexts #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
20
21 module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw) where
22
23 import Data.HashMap.Strict as HM hiding (map)
24 import Data.Text (Text, unpack, splitOn, pack)
25 import Data.Time (parseTimeOrError, defaultTimeLocale)
26 import Data.Time.Clock (UTCTime, getCurrentTime)
27 import Data.Time.LocalTime (utc)
28 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
29 import Duckling.Api (analyze)
30 import Duckling.Core (makeLocale, Some(This), Dimension(Time))
31 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime))
32 import Duckling.Types (ResolvedToken)
33 import Duckling.Types (jsonValue)
34 import Gargantext.Core (Lang(FR,EN))
35 import Gargantext.Prelude
36 import qualified Data.Aeson as Json
37 import qualified Data.HashSet as HashSet
38 import qualified Duckling.Core as DC
39
40 ------------------------------------------------------------------------
41 -- | Date Parser
42 -- Parses dates mentions in full text given the language.
43 -- >>> parseDate FR (pack "10 avril 1979 à 19H")
44 -- 1979-04-10 19:00:00 UTC
45 -- >>> parseDate EN (pack "April 10 1979")
46 -- 1979-04-10 00:00:00 UTC
47 parseDate :: Lang -> Text -> IO UTCTime
48 parseDate lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
49
50 type DateFormat = Text
51 type DateDefault = Text
52
53 parseDate' :: DateFormat -> DateDefault -> Lang -> Text -> IO UTCTime
54 parseDate' format def lang s = do
55 dateStr' <- parseDateRaw lang s
56 let dateStr = unpack $ maybe def identity
57 $ head $ splitOn "." dateStr'
58 pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr
59
60
61 -- TODO add Paris at Duckling.Locale Region datatype
62 -- | To get Homogeinity of the languages
63 -- TODO : put this in a more generic place in the source code
64 parserLang :: Lang -> DC.Lang
65 parserLang FR = DC.FR
66 parserLang EN = DC.EN
67 -- parserLang _ = panic "not implemented"
68
69 -- | Final Date parser API
70 -- IO can be avoided here:
71 -- currentContext :: Lang -> IO Context
72 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
73 -- parseDateRaw :: Context -> Text -> SomeErrorHandling Text
74
75 -- TODO error handling
76 parseDateRaw :: Lang -> Text -> IO (Text)
77 parseDateRaw lang text = do
78 maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
79 case headMay maybeJson of
80 Just (Json.Object object) -> case HM.lookup "value" object of
81 Just (Json.String date) -> pure date
82 Just _ -> panic "ParseDateRaw ERROR: should be a json String"
83 Nothing -> panic $ "ParseDateRaw ERROR: no date found" <> (pack . show) lang <> " " <> text
84
85 _ -> panic $ "ParseDateRaw ERROR: type error" <> (pack . show) lang <> " " <> text
86
87
88 -- | Current Time in DucklingTime format
89 -- TODO : get local Time in a more generic way
90 utcToDucklingTime :: UTCTime -> DucklingTime
91 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
92
93 -- | Local Context which depends on Lang and Time
94 localContext :: Lang -> DucklingTime -> Context
95 localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
96
97 -- | Date parser with Duckling
98 parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
99 parseDateWithDuckling lang input = do
100 contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
101 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
102 pure $ analyze input contxt $ HashSet.fromList [(This Time)]
103