]> Git — Sourcephile - gargantext.git/blob - bin/gargantext-import/Main.hs
[FEAT] MergeWith stem done (before flow integration).
[gargantext.git] / bin / gargantext-import / Main.hs
1 {-|
2 Module : Main.hs
3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Import a corpus binary.
11
12 -}
13
14 {-# LANGUAGE Strict #-}
15
16 module Main where
17
18 import Control.Exception (finally)
19 import Data.Either
20 import Data.Text (Text)
21 import Prelude (read)
22 import System.Environment (getArgs)
23 import qualified Data.Text as Text
24
25 import Gargantext.API.Dev (withDevEnv, runCmdDev)
26 import Gargantext.API.Admin.EnvTypes (DevEnv(..))
27 import Gargantext.API.Node () -- instances
28 import Gargantext.API.Prelude (GargError)
29 import Gargantext.Core (Lang(..))
30 import Gargantext.Core.Types.Individu (User(..))
31 import Gargantext.Database.Action.Flow (FlowCmdM, flowCorpusFile, flowAnnuaire, TermType(..))
32 import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
33 import Gargantext.Database.Admin.Types.Node (CorpusId)
34 import Gargantext.Database.Prelude (Cmd)
35 import Gargantext.Prelude
36 import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..))
37
38 main :: IO ()
39 main = do
40 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
41
42 --{-
43
44 let
45 --tt = (Unsupervised EN 6 0 Nothing)
46 tt = (Multi EN)
47 format = CsvGargV3 -- CsvHal --WOS
48 corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
49 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath
50
51 corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
52 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath
53
54 annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
55 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath
56
57 {-
58 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
59 debatCorpus = do
60 docs <- liftIO ( splitEvery 500
61 <$> take (read limit :: Int)
62 <$> readFile corpusPath
63 :: IO [[GrandDebatReference ]]
64 )
65 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
66 --}
67
68 withDevEnv iniPath $ \env -> do
69 _ <- if fun == "corpus"
70 then runCmdDev env corpus
71 else pure 0 --(cs "false")
72
73 _ <- if fun == "corpusCsvHal"
74 then runCmdDev env corpusCsvHal
75 else pure 0 --(cs "false")
76
77 _ <- if fun == "annuaire"
78 then runCmdDev env annuaire
79 else pure 0
80 {-
81 _ <- if corpusType == "csv"
82 then runCmdDev env csvCorpus
83 else if corpusType == "debat"
84 then runCmdDev env debatCorpus
85 else panic "corpusType unknown: try \"csv\" or \"debat\""
86 -}
87 pure ()