2 Module : Gargantext.Core.Text.Corpus.Parsers.JSON
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 JSON parser for Gargantext corpus files.
14 {-# LANGUAGE DuplicateRecordFields #-}
16 module Gargantext.Core.Text.Corpus.Parsers.JSON where
20 import qualified Data.ByteString.Lazy as BL
21 import Data.Either (Either(..))
25 import qualified Prelude
27 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
28 -- import Gargantext.Database.Schema.Node (NodePoly(..))
29 import Gargantext.Prelude hiding (length)
33 JSONStruct { documents :: [ JSONStructDocument ]
34 , garg_version :: Text }
36 instance FromJSON JSONStruct
38 data JSONStructDocument =
39 JSONStructDocument { document :: JSONDocument
40 , ngrams :: JSONNgrams
43 instance FromJSON JSONStructDocument
46 JSONDocument { id :: Int
47 , hash_id :: Maybe Text
50 , parent_id :: Maybe Int
53 , hyperdata :: HyperdataDocument }
55 instance FromJSON JSONDocument
58 JSONNgrams { ngrams :: [Text]
61 instance FromJSON JSONNgrams
63 ------------------------------------------------------------------------
64 -- | TODO: documents -> document -> hyperdata + title etc
65 readJSONLazyBS :: BL.ByteString -> Either Prelude.String JSONStruct
66 readJSONLazyBS bs = eitherDecode bs
69 parseJSONC :: BL.ByteString
70 -> Either Prelude.String (Maybe Integer, ConduitT () HyperdataDocument Identity ())
72 case readJSONLazyBS bs of
74 Right (JSONStruct { documents }) ->
75 Right ( Just $ Prelude.fromIntegral $ Prelude.length documents
76 , yieldMany documents .| mapC doc2hyperdoc )
78 doc2hyperdoc :: JSONStructDocument -> HyperdataDocument
79 doc2hyperdoc (JSONStructDocument { document = JSONDocument { hyperdata } }) = hyperdata