3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE Strict #-}
18 import Control.Exception (finally)
20 import Data.Text (Text)
22 import System.Environment (getArgs)
23 import qualified Data.Text as Text
25 import Gargantext.API.Dev (withDevEnv, runCmdDev)
26 import Gargantext.API.Admin.EnvTypes (DevEnv(..))
27 import Gargantext.API.Node () -- instances
28 import Gargantext.API.Prelude (GargError)
29 import Gargantext.Core (Lang(..))
30 import Gargantext.Core.Types.Individu (User(..))
31 import Gargantext.Database.Action.Flow (FlowCmdM, flowCorpusFile, flowAnnuaire, TermType(..))
32 import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
33 import Gargantext.Database.Admin.Types.Node (CorpusId)
34 import Gargantext.Database.Prelude (Cmd)
35 import Gargantext.Prelude
36 import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..))
40 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
45 --tt = (Unsupervised EN 6 0 Nothing)
47 format = CsvGargV3 -- CsvHal --WOS
48 corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
49 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath
51 corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
52 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath
54 annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
55 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath
58 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
60 docs <- liftIO ( splitEvery 500
61 <$> take (read limit :: Int)
62 <$> readFile corpusPath
63 :: IO [[GrandDebatReference ]]
65 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
68 withDevEnv iniPath $ \env -> do
69 _ <- if fun == "corpus"
70 then runCmdDev env corpus
71 else pure 0 --(cs "false")
73 _ <- if fun == "corpusCsvHal"
74 then runCmdDev env corpusCsvHal
75 else pure 0 --(cs "false")
77 _ <- if fun == "annuaire"
78 then runCmdDev env annuaire
81 _ <- if corpusType == "csv"
82 then runCmdDev env csvCorpus
83 else if corpusType == "debat"
84 then runCmdDev env debatCorpus
85 else panic "corpusType unknown: try \"csv\" or \"debat\""