]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/Parsers/Gitlab.hs
corpus/parsers: add gitlab issue parser
[gargantext.git] / src / Gargantext / Core / Text / Corpus / Parsers / Gitlab.hs
1 module Gargantext.Core.Text.Corpus.Parsers.Gitlab (
2 Issue(..), gitlabIssue2hyperdataDocument, readFile_Issues, readFile_IssuesAsDocs
3 ) where
4
5 import Data.Aeson
6 import Data.Time
7 import qualified Data.Text as DT
8 import qualified Data.ByteString.Lazy as DBL
9 import System.FilePath (FilePath)
10
11 import Gargantext.Prelude
12 import Gargantext.Core (Lang(..))
13 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
14
15 data Issue = Issue { _issue_id :: !Int
16 , _issue_title :: !DT.Text
17 , _issue_content :: !DT.Text
18 , _issue_created :: !LocalTime
19 , _issue_closed :: !(Maybe UTCTime)
20 }
21 deriving (Show)
22
23 instance FromJSON Issue where
24 parseJSON = withObject "Issue" $ \v -> Issue
25 <$> v .: "c0" -- id
26 <*> v .: "c1" -- title
27 <*> v .: "c2" -- content
28 <*> v .: "c3" -- creation time
29 <*> v .:? "c4" -- close time
30
31 gitlabIssue2hyperdataDocument :: Issue -> HyperdataDocument
32 gitlabIssue2hyperdataDocument issue = HyperdataDocument
33 { _hd_bdd = Nothing
34 , _hd_doi = Nothing
35 , _hd_url = Nothing
36 , _hd_uniqId = Nothing
37 , _hd_uniqIdBdd = Nothing
38 , _hd_page = Nothing
39 , _hd_title = Just (_issue_title issue)
40 , _hd_authors = Nothing
41 , _hd_institutes = Nothing
42 , _hd_source = Nothing
43 , _hd_abstract = Just (_issue_content issue)
44 , _hd_publication_date = Just $ DT.pack $ show date
45 , _hd_publication_year = Just $ fromIntegral year
46 , _hd_publication_month = Just month
47 , _hd_publication_day = Just day
48 , _hd_publication_hour = Just (todHour tod)
49 , _hd_publication_minute = Just (todMin tod)
50 , _hd_publication_second = Just (round $ todSec tod)
51 , _hd_language_iso2 = Just $ (DT.pack . show) lang
52 }
53 where lang = EN
54 date = _issue_created issue
55 (year, month, day) = toGregorian $ localDay date
56 tod = localTimeOfDay date
57
58 readFile_Issues :: FilePath -> IO [Issue]
59 readFile_Issues fp = do
60 raw <- DBL.readFile fp
61 let mayIssues = decode raw
62 case mayIssues of
63 Just is -> pure is
64 Nothing -> pure []
65
66 readFile_IssuesAsDocs :: FilePath -> IO [HyperdataDocument]
67 readFile_IssuesAsDocs = fmap (fmap gitlabIssue2hyperdataDocument) . readFile_Issues