3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE Strict #-}
18 import Control.Exception (finally)
20 import Data.Text (Text)
22 import System.Environment (getArgs)
23 import qualified Data.Text as Text
25 import Gargantext.API.Dev (withDevEnv, runCmdDev)
26 import Gargantext.API.Admin.EnvTypes (DevEnv(..))
27 import Gargantext.API.Node () -- instances
28 import Gargantext.API.Prelude (GargError)
29 import Gargantext.Core (Lang(..))
30 import Gargantext.Core.Types.Individu (User(..))
31 import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
32 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
33 import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
34 import Gargantext.Database.Admin.Types.Node (CorpusId)
35 import Gargantext.Database.Prelude (Cmd)
36 import Gargantext.Prelude
37 import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..))
41 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
46 --tt = (Unsupervised EN 6 0 Nothing)
48 format = CsvGargV3 -- CsvHal --WOS
49 corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
50 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath
52 corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
53 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath
55 annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
56 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath
59 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
61 docs <- liftIO ( splitEvery 500
62 <$> take (read limit :: Int)
63 <$> readFile corpusPath
64 :: IO [[GrandDebatReference ]]
66 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
69 withDevEnv iniPath $ \env -> do
70 _ <- if fun == "corpus"
71 then runCmdDev env corpus
72 else pure 0 --(cs "false")
74 _ <- if fun == "corpusCsvHal"
75 then runCmdDev env corpusCsvHal
76 else pure 0 --(cs "false")
78 _ <- if fun == "annuaire"
79 then runCmdDev env annuaire
82 _ <- if corpusType == "csv"
83 then runCmdDev env csvCorpus
84 else if corpusType == "debat"
85 then runCmdDev env debatCorpus
86 else panic "corpusType unknown: try \"csv\" or \"debat\""