]> Git — Sourcephile - gargantext.git/blob - bin/gargantext-import/Main.hs
[ngrams] remove materialized view, some refactorings
[gargantext.git] / bin / gargantext-import / Main.hs
1 {-|
2 Module : Main.hs
3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Import a corpus binary.
11
12 -}
13
14 {-# LANGUAGE Strict #-}
15
16 module Main where
17
18 import Control.Exception (finally)
19 import Data.Either
20 import Data.Maybe (Maybe(..))
21 import Data.Text (Text)
22 import System.Environment (getArgs)
23 import qualified Data.Text as Text
24 import Text.Read (readMaybe)
25
26 import Gargantext.API.Dev (withDevEnv, runCmdGargDev)
27 import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..))
28 import Gargantext.API.Admin.Orchestrator.Types (JobLog)
29 import Gargantext.API.Node () -- instances
30 import Gargantext.API.Prelude (GargError)
31 import Gargantext.Core (Lang(..))
32 import Gargantext.Core.Types.Individu (User(..))
33 import Gargantext.Core.Types.Query (Limit)
34 import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
35 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
36 import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
37 import Gargantext.Database.Admin.Types.Node (CorpusId)
38 import Gargantext.Database.Prelude (Cmd)
39 import Gargantext.Prelude
40 import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..))
41 import Gargantext.Utils.Jobs (MonadJobStatus, JobHandle)
42
43 main :: IO ()
44 main = do
45 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
46
47 --{-
48
49 let
50 --tt = (Unsupervised EN 6 0 Nothing)
51 tt = (Multi EN)
52 format = CsvGargV3 -- CsvHal --WOS
53 limit' = case (readMaybe limit :: Maybe Limit) of
54 Nothing -> panic $ "Cannot read limit: " <> (Text.pack limit)
55 Just l -> l
56 corpus :: forall m. (FlowCmdM DevEnv GargError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
57 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt format Plain corpusPath Nothing DevJobHandle
58
59 corpusCsvHal :: forall m. (FlowCmdM DevEnv GargError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
60 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt CsvHal Plain corpusPath Nothing DevJobHandle
61
62 annuaire :: forall m. (FlowCmdM DevEnv GargError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
63 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath DevJobHandle
64
65 {-
66 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
67 debatCorpus = do
68 docs <- liftIO ( splitEvery 500
69 <$> take (read limit :: Int)
70 <$> readFile corpusPath
71 :: IO [[GrandDebatReference ]]
72 )
73 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
74 --}
75
76 withDevEnv iniPath $ \env -> do
77 _ <- if fun == "corpus"
78 then runCmdGargDev env corpus
79 else pure 0 --(cs "false")
80
81 _ <- if fun == "corpusCsvHal"
82 then runCmdGargDev env corpusCsvHal
83 else pure 0 --(cs "false")
84
85 _ <- if fun == "annuaire"
86 then runCmdGargDev env annuaire
87 else pure 0
88 {-
89 _ <- if corpusType == "csv"
90 then runCmdDev env csvCorpus
91 else if corpusType == "debat"
92 then runCmdDev env debatCorpus
93 else panic "corpusType unknown: try \"csv\" or \"debat\""
94 -}
95 pure ()