3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE Strict #-}
18 import Control.Exception (finally)
20 import Data.Text (Text)
22 import System.Environment (getArgs)
23 import qualified Data.Text as Text
25 import Gargantext.API.Dev (withDevEnv, runCmdDev)
26 import Gargantext.API.Admin.EnvTypes (DevEnv(..))
27 import Gargantext.API.Node () -- instances
28 import Gargantext.API.Prelude (GargError)
29 import Gargantext.Core (Lang(..))
30 import Gargantext.Core.Types.Individu (User(..))
31 import Gargantext.Database.Action.Flow (FlowCmdM, flowCorpusFile, flowAnnuaire, TermType(..))
32 import Gargantext.Database.Query.Table.User (insertUsersDemo)
33 import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
34 import Gargantext.Database.Admin.Types.Node (CorpusId)
35 import Gargantext.Database.Prelude (Cmd)
36 import Gargantext.Prelude
37 import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..))
41 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
45 let createUsers :: Cmd GargError Int64
46 createUsers = insertUsersDemo
49 --tt = (Unsupervised EN 6 0 Nothing)
51 format = CsvGargV3 -- CsvHal --WOS
52 corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
53 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath
55 corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
56 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath
58 annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
59 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath
62 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
64 docs <- liftIO ( splitEvery 500
65 <$> take (read limit :: Int)
66 <$> readFile corpusPath
67 :: IO [[GrandDebatReference ]]
69 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
72 withDevEnv iniPath $ \env -> do
73 _ <- if fun == "users"
74 then runCmdDev env createUsers
75 else pure 0 --(cs "false")
77 _ <- if fun == "corpus"
78 then runCmdDev env corpus
79 else pure 0 --(cs "false")
81 _ <- if fun == "corpusCsvHal"
82 then runCmdDev env corpusCsvHal
83 else pure 0 --(cs "false")
85 _ <- if fun == "annuaire"
86 then runCmdDev env annuaire
89 _ <- if corpusType == "csv"
90 then runCmdDev env csvCorpus
91 else if corpusType == "debat"
92 then runCmdDev env debatCorpus
93 else panic "corpusType unknown: try \"csv\" or \"debat\""