]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
Merge branch 'dev' into 97-dev-istex-search
[gargantext.git] / src / Gargantext / Core / Text / Corpus / Parsers / Date.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.Parsers.Date
3 Description : Some utils to parse dates
4 Copyright : (c) CNRS 2017-present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 According to the language of the text, parseDateRaw returns date as Text:
11
12 TODO : Add some tests
13 import Gargantext.Core.Text.Corpus.Parsers.Date as DGP
14 DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
15 -}
16
17 {-# LANGUAGE TypeFamilies #-}
18
19 module Gargantext.Core.Text.Corpus.Parsers.Date
20 {-(parse, parseRaw, dateSplit, Year, Month, Day)-}
21 where
22
23 import Data.Aeson (toJSON, Value)
24 import Data.Either (Either(..))
25 import Data.HashMap.Strict as HM hiding (map)
26 import Data.Text (Text, unpack, splitOn)
27 import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian)
28 import qualified Data.Time.Calendar as DTC
29 import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
30 import Data.Time.LocalTime (utc)
31 import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
32 import Duckling.Api (analyze)
33 import Duckling.Core (makeLocale, Dimension(Time))
34 import Duckling.Types (Seal(..))
35 import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
36 import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
37 import Gargantext.Core (Lang(FR,EN))
38 import Gargantext.Prelude
39 import qualified Data.Aeson as Json
40 import qualified Data.HashSet as HashSet
41 import qualified Duckling.Core as DC
42
43 ------------------------------------------------------------------------
44 -- | Parse date to Ints
45 -- TODO add hours, minutes and seconds
46 dateSplit :: Lang -> Maybe Text -> (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
47 dateSplit _ Nothing = (Nothing, (Nothing, Nothing, Nothing))
48 dateSplit l (Just txt) = do
49 let utcTime = parse l txt
50 let (y, m, d) = split' utcTime
51 (Just utcTime, (Just y, Just m,Just d))
52
53 split' :: UTCTime -> (Year, Month, Day)
54 split' utcTime = (fromIntegral y, m, d)
55 where
56 (UTCTime day _) = utcTime
57 (y,m,d) = toGregorian day
58
59 type Year = Int
60 type Month = Int
61 type Day = Int
62 ------------------------------------------------------------------------
63
64 -- | Date Parser
65 -- Parses dates mentions in full text given the language.
66 -- >>> parseDate FR (pack "10 avril 1979 à 19H")
67 -- 1979-04-10 19:00:00 UTC
68 -- >>> parseDate EN (pack "April 10 1979")
69 -- 1979-04-10 00:00:00 UTC
70 parse :: Lang -> Text -> UTCTime
71 parse = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0"
72
73 type DateFormat = Text
74 type DateDefault = Text
75
76 parseDate' :: DateFormat
77 -> DateDefault
78 -> Lang
79 -> Text
80 -> UTCTime
81 parseDate' format def lang s = do
82 let dateStr' = parseRaw lang s
83 case dateStr' of
84 Left _err -> defaultUTCTime
85 Right "" -> defaultUTCTime
86 Right ds -> do
87 let dateStr = unpack
88 $ maybe def identity
89 $ head
90 $ splitOn "." ds
91 parseTimeOrError True defaultTimeLocale (unpack format) dateStr
92
93
94 -- TODO add Paris at Duckling.Locale Region datatype
95 -- | To get Homogeinity of the languages
96 -- TODO : put this in a more generic place in the source code
97 parserLang :: Lang -> DC.Lang
98 parserLang FR = DC.FR
99 parserLang EN = DC.EN
100 parserLang _ = panic "not implemented"
101
102 -- | Final Date parser API
103 -- IO can be avoided here:
104 -- currentContext :: Lang -> IO Context
105 -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
106 -- parseRaw :: Context -> Text -> SomeErrorHandling Text
107
108 parseRaw :: Lang -> Text -> Either Text Text
109 parseRaw lang text = do -- case result
110 let maybeResult = extractValue $ getTimeValue
111 $ parseDateWithDuckling lang text (Options True)
112 case maybeResult of
113 Just result -> Right result
114 Nothing -> do
115 -- printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) text
116 Left $ "[G.C.T.C.P.D.parseRaw ERROR] " <> (cs . show) lang <> " :: " <> text
117
118 getTimeValue :: [ResolvedToken] -> Maybe Value
119 getTimeValue rt = case head rt of
120 Nothing -> do
121 Nothing
122 Just x -> case rval x of
123 RVal Time t -> Just $ toJSON t
124 _ -> do
125 Nothing
126
127 extractValue :: Maybe Value -> Maybe Text
128 extractValue (Just (Json.Object object)) =
129 case HM.lookup "value" object of
130 Just (Json.String date) -> Just date
131 _ -> Nothing
132 extractValue _ = Nothing
133
134 -- | Current Time in DucklingTime format
135 -- TODO : get local Time in a more generic way
136 utcToDucklingTime :: UTCTime -> DucklingTime
137 utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
138
139 -- | Local Context which depends on Lang and Time
140 localContext :: Lang -> DucklingTime -> Context
141 localContext lang dt = Context { referenceTime = dt
142 , locale = makeLocale (parserLang lang) Nothing }
143
144 defaultDay :: DTC.Day
145 defaultDay = DTC.fromGregorian 1 1 1
146
147 defaultUTCTime :: UTCTime
148 defaultUTCTime = UTCTime { utctDay = defaultDay
149 , utctDayTime = secondsToDiffTime 0 }
150
151 -- | Date parser with Duckling
152 parseDateWithDuckling :: Lang -> Text -> Options -> [ResolvedToken]
153 parseDateWithDuckling lang input options = do
154 let contxt = localContext lang $ utcToDucklingTime defaultUTCTime
155 --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
156 -- TODO check/test Options False or True
157 analyze input contxt options $ HashSet.fromList [(Seal Time)]
158