]> Git — Sourcephile - gargantext.git/blob - bin/gargantext-import/Main.hs
[VERSION] +1 to 0.0.6.9.8.6.1
[gargantext.git] / bin / gargantext-import / Main.hs
1 {-|
2 Module : Main.hs
3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Import a corpus binary.
11
12 -}
13
14 {-# LANGUAGE Strict #-}
15
16 module Main where
17
18 import Control.Exception (finally)
19 import Data.Either
20 import Data.Maybe (Maybe(..))
21 import Data.Text (Text)
22 import System.Environment (getArgs)
23 import qualified Data.Text as Text
24 import Text.Read (readMaybe)
25
26 import Gargantext.API.Dev (withDevEnv, runCmdGargDev)
27 import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..))
28 import Gargantext.API.Admin.Orchestrator.Types (JobLog)
29 import Gargantext.API.Node () -- instances
30 import Gargantext.API.Prelude (GargError)
31 import Gargantext.Core (Lang(..))
32 import Gargantext.Core.Types.Individu (User(..))
33 import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
34 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
35 import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
36 import Gargantext.Database.Admin.Types.Node (CorpusId)
37 import Gargantext.Database.Prelude (Cmd)
38 import Gargantext.Prelude
39 import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..))
40 import Gargantext.Utils.Jobs (MonadJobStatus, JobHandle)
41
42 main :: IO ()
43 main = do
44 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
45
46 --{-
47
48 let
49 --tt = (Unsupervised EN 6 0 Nothing)
50 tt = (Multi EN)
51 format = CsvGargV3 -- CsvHal --WOS
52 limit' = case (readMaybe limit :: Maybe Int) of
53 Nothing -> panic $ "Cannot read limit: " <> (Text.pack limit)
54 Just l -> l
55 corpus :: forall m. (FlowCmdM DevEnv GargError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
56 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt format Plain corpusPath Nothing DevJobHandle
57
58 corpusCsvHal :: forall m. (FlowCmdM DevEnv GargError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
59 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt CsvHal Plain corpusPath Nothing DevJobHandle
60
61 annuaire :: forall m. (FlowCmdM DevEnv GargError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
62 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath DevJobHandle
63
64 {-
65 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
66 debatCorpus = do
67 docs <- liftIO ( splitEvery 500
68 <$> take (read limit :: Int)
69 <$> readFile corpusPath
70 :: IO [[GrandDebatReference ]]
71 )
72 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
73 --}
74
75 withDevEnv iniPath $ \env -> do
76 _ <- if fun == "corpus"
77 then runCmdGargDev env corpus
78 else pure 0 --(cs "false")
79
80 _ <- if fun == "corpusCsvHal"
81 then runCmdGargDev env corpusCsvHal
82 else pure 0 --(cs "false")
83
84 _ <- if fun == "annuaire"
85 then runCmdGargDev env annuaire
86 else pure 0
87 {-
88 _ <- if corpusType == "csv"
89 then runCmdDev env csvCorpus
90 else if corpusType == "debat"
91 then runCmdDev env debatCorpus
92 else panic "corpusType unknown: try \"csv\" or \"debat\""
93 -}
94 pure ()