3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Import a corpus binary.
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16 {-# LANGUAGE FlexibleContexts #-}
17 {-# LANGUAGE RankNTypes #-}
18 {-# LANGUAGE Strict #-}
22 import Control.Exception (finally)
24 import Data.Text (Text)
25 import Gargantext.API.Node () -- instances
26 import Gargantext.API.Admin.Settings (withDevEnv, runCmdDev, DevEnv)
27 import Gargantext.API.Prelude (GargError)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Core.Types.Individu (User(..))
30 import Gargantext.Database.Action.Flow (FlowCmdM, flowCorpusFile, flowAnnuaire, TermType(..))
31 import Gargantext.Database.Query.Table.User (insertUsersDemo)
32 import Gargantext.Database.Admin.Types.Node (CorpusId, toHyperdataDocument)
33 import Gargantext.Database.Prelude (Cmd, )
34 import Gargantext.Prelude
35 import Gargantext.Text.Corpus.Parsers (FileFormat(..))
37 import System.Environment (getArgs)
38 import qualified Data.Text as Text
42 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
46 let createUsers :: Cmd GargError Int64
47 createUsers = insertUsersDemo
50 --tt = (Unsupervised EN 6 0 Nothing)
52 format = CsvGargV3 -- CsvHal --WOS
53 corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
54 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath
56 corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
57 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath
59 annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
60 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath
64 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
66 docs <- liftIO ( splitEvery 500
67 <$> take (read limit :: Int)
68 <$> readFile corpusPath
69 :: IO [[GrandDebatReference ]]
71 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
74 withDevEnv iniPath $ \env -> do
75 _ <- if fun == "users"
76 then runCmdDev env createUsers
77 else pure 0 --(cs "false")
79 _ <- if fun == "corpus"
80 then runCmdDev env corpus
81 else pure 0 --(cs "false")
83 _ <- if fun == "corpusCsvHal"
84 then runCmdDev env corpusCsvHal
85 else pure 0 --(cs "false")
88 _ <- if fun == "annuaire"
89 then runCmdDev env annuaire
92 _ <- if corpusType == "csv"
93 then runCmdDev env csvCorpus
94 else if corpusType == "debat"
95 then runCmdDev env debatCorpus
96 else panic "corpusType unknown: try \"csv\" or \"debat\""