3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE RankNTypes #-}
18 {-# LANGUAGE Strict #-}
24 import Control.Exception (finally)
25 import Gargantext.Prelude
26 import Gargantext.Database.Flow (FlowCmdM, flowCorpusFile, flowAnnuaire)
27 import Gargantext.Text.Corpus.Parsers (FileFormat(..))
28 import Gargantext.Database.Utils (Cmd, )
29 import Gargantext.Database.Types.Node (CorpusId, toHyperdataDocument)
30 import Gargantext.Database.Schema.User (insertUsersDemo)
31 import Gargantext.Text.Terms (TermType(..))
32 import Gargantext.Core (Lang(..))
33 import Gargantext.API.Types (GargError)
34 import Gargantext.API.Node () -- instances
35 import Gargantext.API.Settings (withDevEnv, runCmdDev, DevEnv)
36 import System.Environment (getArgs)
37 --import Gargantext.Text.Corpus.Parsers.GrandDebat (readFile, GrandDebatReference(..))
38 import Data.Text (Text)
39 import qualified Data.Text as Text
40 import Control.Monad.IO.Class (liftIO)
44 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
48 let createUsers :: Cmd GargError Int64
49 createUsers = insertUsersDemo
52 --tt = (Unsupervised EN 6 0 Nothing)
54 format = CsvGargV3 -- CsvHalFormat --WOS
55 corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
56 corpus = flowCorpusFile (cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath
58 corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
59 corpusCsvHal = flowCorpusFile (cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHalFormat corpusPath
61 annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
62 annuaire = flowAnnuaire (cs user) (Left "Annuaire") (Multi EN) corpusPath
66 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
68 docs <- liftIO ( splitEvery 500
69 <$> take (read limit :: Int)
70 <$> readFile corpusPath
71 :: IO [[GrandDebatReference ]]
73 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
76 withDevEnv iniPath $ \env -> do
77 _ <- if fun == "users"
78 then runCmdDev env createUsers
79 else pure 0 --(cs "false")
81 _ <- if fun == "corpus"
82 then runCmdDev env corpus
83 else pure 0 --(cs "false")
85 _ <- if fun == "corpusCsvHal"
86 then runCmdDev env corpusCsvHal
87 else pure 0 --(cs "false")
90 _ <- if fun == "annuaire"
91 then runCmdDev env annuaire
94 _ <- if corpusType == "csv"
95 then runCmdDev env csvCorpus
96 else if corpusType == "debat"
97 then runCmdDev env debatCorpus
98 else panic "corpusType unknown: try \"csv\" or \"debat\""