3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE Strict #-}
18 import Control.Exception (finally)
20 import Data.Maybe (Maybe(..))
21 import Data.Text (Text)
22 import System.Environment (getArgs)
23 import qualified Data.Text as Text
24 import Text.Read (readMaybe)
26 import Gargantext.API.Dev (withDevEnv, runCmdDev)
27 import Gargantext.API.Admin.EnvTypes (DevEnv(..))
28 import Gargantext.API.Node () -- instances
29 import Gargantext.API.Prelude (GargError)
30 import Gargantext.Core (Lang(..))
31 import Gargantext.Core.Types.Individu (User(..))
32 import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
33 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
35 import Gargantext.Database.Admin.Types.Node (CorpusId)
36 import Gargantext.Database.Prelude (Cmd)
37 import Gargantext.Prelude
38 import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..))
42 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
47 --tt = (Unsupervised EN 6 0 Nothing)
49 format = CsvGargV3 -- CsvHal --WOS
50 limit' = case (readMaybe limit :: Maybe Int) of
51 Nothing -> panic $ "Cannot read limit: " <> (Text.pack limit)
53 corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
54 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt format corpusPath Nothing (\_ -> pure ())
56 corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
57 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt CsvHal corpusPath Nothing (\_ -> pure ())
59 annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
60 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath (\_ -> pure ())
63 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
65 docs <- liftIO ( splitEvery 500
66 <$> take (read limit :: Int)
67 <$> readFile corpusPath
68 :: IO [[GrandDebatReference ]]
70 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
73 withDevEnv iniPath $ \env -> do
74 _ <- if fun == "corpus"
75 then runCmdDev env corpus
76 else pure 0 --(cs "false")
78 _ <- if fun == "corpusCsvHal"
79 then runCmdDev env corpusCsvHal
80 else pure 0 --(cs "false")
82 _ <- if fun == "annuaire"
83 then runCmdDev env annuaire
86 _ <- if corpusType == "csv"
87 then runCmdDev env csvCorpus
88 else if corpusType == "debat"
89 then runCmdDev env debatCorpus
90 else panic "corpusType unknown: try \"csv\" or \"debat\""