2 Module : Gargantext.Core.Text.Corpus.Parsers.GrandDebat
3 Description : Grand Debat Types
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 _flowCorpusDebat :: FlowCmdM env err m
11 => User -> Either CorpusName [CorpusId]
14 _flowCorpusDebat u n l fp = do
15 docs <- liftBase ( splitEvery 500
18 :: IO [[GD.GrandDebatReference ]]
20 flowCorpus u n (Multi FR) (map (map toHyperdataDocument) docs)
26 module Gargantext.Core.Text.Corpus.Parsers.GrandDebat
29 import Data.Aeson (ToJSON, FromJSON)
30 import Data.Text (Text)
31 import GHC.Generics (Generic)
32 import Gargantext.Core (Lang(..))
33 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument)
34 import Gargantext.Prelude
35 import Gargantext.Database.GargDB
36 import qualified Data.ByteString.Lazy as DBL
37 import qualified Data.JsonStream.Parser as P
38 import qualified Data.Text as Text
40 data GrandDebatReference = GrandDebatReference
42 , reference :: !(Maybe Text)
43 , title :: !(Maybe Text)
45 , createdAt :: !(Maybe Text)
46 , publishedAt :: !(Maybe Text)
47 , updatedAt :: !(Maybe Text)
49 , trashed :: !(Maybe Bool)
50 , trashedStatus :: !(Maybe Text)
52 , authorId :: !(Maybe Text)
53 , authorType :: !(Maybe Text)
54 , authorZipCode :: !(Maybe Text)
56 , responses :: !(Maybe [GrandDebatResponse])
58 deriving (Show, Generic)
61 data GrandDebatResponse = GrandDebatResponse
62 { questionId :: !(Maybe Text)
63 , questionTitle :: !(Maybe Text)
64 , value :: !(Maybe Text)
65 , formattedValue :: !(Maybe Text)
67 deriving (Show, Generic)
69 instance FromJSON GrandDebatResponse
70 instance FromJSON GrandDebatReference
72 instance ToJSON GrandDebatResponse
73 instance ToJSON GrandDebatReference
76 instance ToHyperdataDocument GrandDebatReference
78 toHyperdataDocument (GrandDebatReference { id, title, publishedAt, authorType, authorZipCode, responses }) =
79 HyperdataDocument { _hd_bdd = Just "GrandDebat"
82 , _hd_uniqId = Nothing
83 , _hd_uniqIdBdd = Nothing
86 , _hd_authors = authorType
87 , _hd_institutes = authorType
88 , _hd_source = authorZipCode
89 , _hd_abstract = toAbstract <$> responses
90 , _hd_publication_date = publishedAt
91 , _hd_publication_year = Nothing
92 , _hd_publication_month = Nothing
93 , _hd_publication_day = Nothing
94 , _hd_publication_hour = Nothing
95 , _hd_publication_minute = Nothing
96 , _hd_publication_second = Nothing
97 , _hd_language_iso2 = Just $ Text.pack $ show FR }
99 toAbstract = (Text.intercalate " . ") . ((filter (/= "")) . (map toSentence))
100 toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of
102 Just r' -> case Text.length r' > 10 of
106 instance ReadFile [GrandDebatReference]
108 -- | read json: 3 version below are working but with increased optimization
109 --readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp
110 --readFile fp = either (panic . Text.pack) identity <$> P.eitherDecode <$> DBL.readFile fp
111 readFile' fp = P.parseLazyByteString (P.arrayOf P.value) <$> DBL.readFile fp