3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE Strict #-}
18 import Control.Exception (finally)
20 import Data.Text (Text)
21 import Gargantext.API.Node () -- instances
22 import Gargantext.API.Admin.Settings (withDevEnv, runCmdDev, DevEnv)
23 import Gargantext.API.Prelude (GargError)
24 import Gargantext.Core (Lang(..))
25 import Gargantext.Core.Types.Individu (User(..))
26 import Gargantext.Database.Action.Flow (FlowCmdM, flowCorpusFile, flowAnnuaire, TermType(..))
27 import Gargantext.Database.Query.Table.User (insertUsersDemo)
28 import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
29 import Gargantext.Database.Admin.Types.Node (CorpusId)
30 import Gargantext.Database.Prelude (Cmd)
31 import Gargantext.Prelude
32 import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..))
34 import System.Environment (getArgs)
35 import qualified Data.Text as Text
39 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
43 let createUsers :: Cmd GargError Int64
44 createUsers = insertUsersDemo
47 --tt = (Unsupervised EN 6 0 Nothing)
49 format = CsvGargV3 -- CsvHal --WOS
50 corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
51 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath
53 corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
54 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath
56 annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
57 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath
60 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
62 docs <- liftIO ( splitEvery 500
63 <$> take (read limit :: Int)
64 <$> readFile corpusPath
65 :: IO [[GrandDebatReference ]]
67 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
70 withDevEnv iniPath $ \env -> do
71 _ <- if fun == "users"
72 then runCmdDev env createUsers
73 else pure 0 --(cs "false")
75 _ <- if fun == "corpus"
76 then runCmdDev env corpus
77 else pure 0 --(cs "false")
79 _ <- if fun == "corpusCsvHal"
80 then runCmdDev env corpusCsvHal
81 else pure 0 --(cs "false")
83 _ <- if fun == "annuaire"
84 then runCmdDev env annuaire
87 _ <- if corpusType == "csv"
88 then runCmdDev env csvCorpus
89 else if corpusType == "debat"
90 then runCmdDev env debatCorpus
91 else panic "corpusType unknown: try \"csv\" or \"debat\""