3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE Strict #-}
18 import Control.Exception (finally)
20 import Data.Maybe (Maybe(..))
21 import Data.Text (Text)
22 import System.Environment (getArgs)
23 import qualified Data.Text as Text
24 import Text.Read (readMaybe)
26 import Gargantext.API.Dev (withDevEnv, runCmdGargDev)
27 import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..))
28 import Gargantext.API.Admin.Orchestrator.Types (JobLog)
29 import Gargantext.API.Node () -- instances
30 import Gargantext.API.Prelude (GargError)
31 import Gargantext.Core (Lang(..))
32 import Gargantext.Core.Types.Individu (User(..))
33 import Gargantext.Core.Types.Query (Limit)
34 import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
35 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
36 import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
37 import Gargantext.Database.Admin.Types.Node (CorpusId)
38 import Gargantext.Database.Prelude (Cmd)
39 import Gargantext.Prelude
40 import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..))
41 import Gargantext.Utils.Jobs (MonadJobStatus, JobHandle)
45 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
50 --tt = (Unsupervised EN 6 0 Nothing)
52 format = CsvGargV3 -- CsvHal --WOS
53 limit' = case (readMaybe limit :: Maybe Limit) of
54 Nothing -> panic $ "Cannot read limit: " <> (Text.pack limit)
56 corpus :: forall m. (FlowCmdM DevEnv GargError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
57 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt format Plain corpusPath Nothing DevJobHandle
59 corpusCsvHal :: forall m. (FlowCmdM DevEnv GargError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
60 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt CsvHal Plain corpusPath Nothing DevJobHandle
62 annuaire :: forall m. (FlowCmdM DevEnv GargError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
63 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath DevJobHandle
66 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
68 docs <- liftIO ( splitEvery 500
69 <$> take (read limit :: Int)
70 <$> readFile corpusPath
71 :: IO [[GrandDebatReference ]]
73 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
76 withDevEnv iniPath $ \env -> do
77 _ <- if fun == "corpus"
78 then runCmdGargDev env corpus
79 else pure 0 --(cs "false")
81 _ <- if fun == "corpusCsvHal"
82 then runCmdGargDev env corpusCsvHal
83 else pure 0 --(cs "false")
85 _ <- if fun == "annuaire"
86 then runCmdGargDev env annuaire
89 _ <- if corpusType == "csv"
90 then runCmdDev env csvCorpus
91 else if corpusType == "debat"
92 then runCmdDev env debatCorpus
93 else panic "corpusType unknown: try \"csv\" or \"debat\""