3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE Strict #-}
18 import Control.Exception (finally)
20 import Data.Text (Text)
21 import Gargantext.API.Node () -- instances
22 import Gargantext.API.Admin.Settings (withDevEnv, runCmdDev, DevEnv)
23 import Gargantext.API.Prelude (GargError)
24 import Gargantext.Core (Lang(..))
25 import Gargantext.Core.Types.Individu (User(..))
26 import Gargantext.Database.Action.Flow (FlowCmdM, flowCorpusFile, flowAnnuaire, TermType(..))
27 import Gargantext.Database.Query.Table.User (insertUsersDemo)
28 import Gargantext.Database.Admin.Types.Node (CorpusId, toHyperdataDocument)
29 import Gargantext.Database.Prelude (Cmd, )
30 import Gargantext.Prelude
31 import Gargantext.Text.Corpus.Parsers (FileFormat(..))
33 import System.Environment (getArgs)
34 import qualified Data.Text as Text
38 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
42 let createUsers :: Cmd GargError Int64
43 createUsers = insertUsersDemo
46 --tt = (Unsupervised EN 6 0 Nothing)
48 format = CsvGargV3 -- CsvHal --WOS
49 corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
50 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath
52 corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
53 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath
55 annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
56 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath
59 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
61 docs <- liftIO ( splitEvery 500
62 <$> take (read limit :: Int)
63 <$> readFile corpusPath
64 :: IO [[GrandDebatReference ]]
66 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
69 withDevEnv iniPath $ \env -> do
70 _ <- if fun == "users"
71 then runCmdDev env createUsers
72 else pure 0 --(cs "false")
74 _ <- if fun == "corpus"
75 then runCmdDev env corpus
76 else pure 0 --(cs "false")
78 _ <- if fun == "corpusCsvHal"
79 then runCmdDev env corpusCsvHal
80 else pure 0 --(cs "false")
82 _ <- if fun == "annuaire"
83 then runCmdDev env annuaire
86 _ <- if corpusType == "csv"
87 then runCmdDev env csvCorpus
88 else if corpusType == "debat"
89 then runCmdDev env debatCorpus
90 else panic "corpusType unknown: try \"csv\" or \"debat\""