]> Git — Sourcephile - gargantext.git/blob - bin/gargantext-cli/Main.hs
Fix splitting and show progress
[gargantext.git] / bin / gargantext-cli / Main.hs
1 {-|
2 Module : Main.hs
3 Description : Gargantext starter
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main specifications to index a corpus with a term list
11
12 -}
13
14 {-# LANGUAGE DataKinds #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE NoImplicitPrelude #-}
18 {-# LANGUAGE OverloadedStrings #-}
19 {-# LANGUAGE StandaloneDeriving #-}
20 {-# LANGUAGE TypeOperators #-}
21 {-# LANGUAGE Strict #-}
22
23 module Main where
24
25 import qualified Data.Vector as DV
26
27 import Control.Monad (zipWithM)
28 import Control.Monad.IO.Class
29 import Data.Text (Text)
30 import Data.List (cycle)
31 import System.IO (hPutStr, hFlush, stderr)
32 import System.Environment
33 --import Control.Concurrent.Async as CCA (mapConcurrently)
34
35 import Gargantext.Prelude
36 import Gargantext.Text.Context
37 import Gargantext.Text.Terms
38 import Gargantext.Text.Terms.WithList
39 import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract)
40 import Gargantext.Text.List.CSV (csvGraphTermList)
41 import Gargantext.Text.Terms (terms)
42 import Gargantext.Text.Metrics.Count (cooc)
43
44 mapMP :: MonadIO m => (a -> m b) -> [a] -> m [b]
45 mapMP f xs = do
46 bs <- zipWithM g (cycle "-\\|/") xs
47 liftIO $ hPutStr stderr "\rDone\n"
48 pure bs
49 where
50 g c x = do
51 liftIO $ hPutStr stderr ['\r',c]
52 liftIO $ hFlush stderr
53 f x
54
55 main :: IO ()
56 main = do
57 [corpusFile, termListFile, outputFile] <- getArgs
58
59 -- corpus :: [Text]
60 corpus <- DV.toList <$> map (\n -> (csv_title n) <> " " <> (csv_abstract n))
61 <$> snd
62 <$> readCsv corpusFile
63
64 putStrLn $ show $ length corpus
65 -- termListMap :: [Text]
66 termList <- csvGraphTermList termListFile
67
68 putStrLn $ show $ length termList
69
70 let patterns = WithList $ buildPatterns termList
71 corpusIndexed <- mapMP (terms patterns) corpus
72 mapM (putStrLn . show) corpusIndexed
73 let myCooc = cooc corpusIndexed
74
75 putStrLn $ show myCooc
76 --writeFile outputFile cooc