]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/Parsers/JSON.hs
corpus/parsers: add gitlab issue parser
[gargantext.git] / src / Gargantext / Core / Text / Corpus / Parsers / JSON.hs
1 {-|
2 Module : Gargantext.Core.Text.Corpus.Parsers.JSON
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 JSON parser for Gargantext corpus files.
11
12 -}
13
14 {-# LANGUAGE DuplicateRecordFields #-}
15
16 module Gargantext.Core.Text.Corpus.Parsers.JSON where
17
18 import Conduit
19 import Data.Aeson
20 import qualified Data.ByteString.Lazy as BL
21 import Data.Either (Either(..))
22 import Data.Text
23 import GHC.Generics
24
25 import qualified Prelude
26
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
28 -- import Gargantext.Database.Schema.Node (NodePoly(..))
29 import Gargantext.Prelude hiding (length)
30
31
32 data JSONStruct =
33 JSONStruct { documents :: [ JSONStructDocument ]
34 , garg_version :: Text }
35 deriving (Generic)
36 instance FromJSON JSONStruct
37
38 data JSONStructDocument =
39 JSONStructDocument { document :: JSONDocument
40 , ngrams :: JSONNgrams
41 , hash :: Text }
42 deriving (Generic)
43 instance FromJSON JSONStructDocument
44
45 data JSONDocument =
46 JSONDocument { id :: Int
47 , hash_id :: Maybe Text
48 , typename :: Int
49 , user_id :: Int
50 , parent_id :: Maybe Int
51 , name :: Text
52 , date :: Text
53 , hyperdata :: HyperdataDocument }
54 deriving (Generic)
55 instance FromJSON JSONDocument
56
57 data JSONNgrams =
58 JSONNgrams { ngrams :: [Text]
59 , hash :: Text }
60 deriving (Generic)
61 instance FromJSON JSONNgrams
62
63 ------------------------------------------------------------------------
64 -- | TODO: documents -> document -> hyperdata + title etc
65 readJSONLazyBS :: BL.ByteString -> Either Prelude.String JSONStruct
66 readJSONLazyBS bs = eitherDecode bs
67
68
69 parseJSONC :: BL.ByteString
70 -> Either Prelude.String (Maybe Integer, ConduitT () HyperdataDocument Identity ())
71 parseJSONC bs = do
72 case readJSONLazyBS bs of
73 Left err -> Left err
74 Right (JSONStruct { documents }) ->
75 Right ( Just $ Prelude.fromIntegral $ Prelude.length documents
76 , yieldMany documents .| mapC doc2hyperdoc )
77
78 doc2hyperdoc :: JSONStructDocument -> HyperdataDocument
79 doc2hyperdoc (JSONStructDocument { document = JSONDocument { hyperdata } }) = hyperdata