]> Git — Sourcephile - gargantext.git/blob - bin/gargantext-import/Main.hs
new strategy for choosing temporal matching candidates
[gargantext.git] / bin / gargantext-import / Main.hs
1 {-|
2 Module : Main.hs
3 Description : Gargantext Import Corpus
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Import a corpus binary.
11
12 -}
13
14 {-# LANGUAGE Strict #-}
15
16 module Main where
17
18 import Control.Exception (finally)
19 import Data.Either
20 import Data.Maybe (Maybe(..))
21 import Data.Text (Text)
22 import System.Environment (getArgs)
23 import qualified Data.Text as Text
24 import Text.Read (readMaybe)
25
26 import Gargantext.API.Dev (withDevEnv, runCmdDev)
27 import Gargantext.API.Admin.EnvTypes (DevEnv(..))
28 import Gargantext.API.Node () -- instances
29 import Gargantext.API.Prelude (GargError)
30 import Gargantext.Core (Lang(..))
31 import Gargantext.Core.Types.Individu (User(..))
32 import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
33 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
35 import Gargantext.Database.Admin.Types.Node (CorpusId)
36 import Gargantext.Database.Prelude (Cmd)
37 import Gargantext.Prelude
38 import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..))
39
40 main :: IO ()
41 main = do
42 [fun, user, name, iniPath, limit, corpusPath] <- getArgs
43
44 --{-
45
46 let
47 --tt = (Unsupervised EN 6 0 Nothing)
48 tt = (Multi EN)
49 format = CsvGargV3 -- CsvHal --WOS
50 limit' = case (readMaybe limit :: Maybe Int) of
51 Nothing -> panic $ "Cannot read limit: " <> (Text.pack limit)
52 Just l -> l
53 corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
54 corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt format corpusPath Nothing (\_ -> pure ())
55
56 corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
57 corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt CsvHal corpusPath Nothing (\_ -> pure ())
58
59 annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
60 annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath (\_ -> pure ())
61
62 {-
63 let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
64 debatCorpus = do
65 docs <- liftIO ( splitEvery 500
66 <$> take (read limit :: Int)
67 <$> readFile corpusPath
68 :: IO [[GrandDebatReference ]]
69 )
70 flowCorpus (Text.pack user) (Text.pack name) (Multi FR) (map (map toHyperdataDocument) docs)
71 --}
72
73 withDevEnv iniPath $ \env -> do
74 _ <- if fun == "corpus"
75 then runCmdDev env corpus
76 else pure 0 --(cs "false")
77
78 _ <- if fun == "corpusCsvHal"
79 then runCmdDev env corpusCsvHal
80 else pure 0 --(cs "false")
81
82 _ <- if fun == "annuaire"
83 then runCmdDev env annuaire
84 else pure 0
85 {-
86 _ <- if corpusType == "csv"
87 then runCmdDev env csvCorpus
88 else if corpusType == "debat"
89 then runCmdDev env debatCorpus
90 else panic "corpusType unknown: try \"csv\" or \"debat\""
91 -}
92 pure ()