]> Git — Sourcephile - gargantext.git/blob - bin/gargantext-cli/Main.hs
Show years
[gargantext.git] / bin / gargantext-cli / Main.hs
1 {-|
2 Module : Main.hs
3 Description : Gargantext starter
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main specifications to index a corpus with a term list
11
12 -}
13
14 {-# LANGUAGE DataKinds #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE NoImplicitPrelude #-}
18 {-# LANGUAGE OverloadedStrings #-}
19 {-# LANGUAGE StandaloneDeriving #-}
20 {-# LANGUAGE TypeOperators #-}
21 {-# LANGUAGE Strict #-}
22
23 module Main where
24
25 import qualified Data.Vector as DV
26 import qualified Data.Maybe as DMaybe
27
28 import Control.Monad (zipWithM)
29 import Control.Monad.IO.Class
30
31 import qualified Data.IntMap as DM
32
33 import Data.Map (Map)
34 import Data.Text (Text)
35 import Data.List (cycle)
36 import System.IO (hPutStr, hFlush, stderr)
37 import System.Environment
38 import Control.Concurrent.Async as CCA (mapConcurrently)
39
40 import Gargantext.Prelude
41 import Gargantext.Core
42 import Gargantext.Core.Types
43 import Gargantext.Text.Terms
44 import Gargantext.Text.Terms.WithList
45 import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
46 import Gargantext.Text.List.CSV (csvGraphTermList)
47 import Gargantext.Text.Terms (terms)
48 import Gargantext.Text.Metrics.Count (coocOn, Coocs)
49
50 mapMP :: MonadIO m => (a -> m b) -> [a] -> m [b]
51 mapMP f xs = do
52 bs <- zipWithM g (cycle "-\\|/") xs
53 liftIO $ hPutStr stderr "\rDone\n"
54 pure bs
55 where
56 g c x = do
57 liftIO $ hPutStr stderr ['\r',c]
58 liftIO $ hFlush stderr
59 f x
60
61
62
63
64 filterTermsAndCooc
65 :: TermType Lang
66 -> (Int, [Text])
67 -> IO (Map (Terms, Terms) Coocs)
68 filterTermsAndCooc patterns (year, ts) = do
69 putStrLn $ "start filterTermsAndCooc " <> show year
70 r <- coocOn identity <$> mapM (terms patterns) ts
71 putStrLn $ "stop filterTermsAndCooc " <> show year
72 pure r
73
74 --main :: IO [()]
75 main = do
76 [corpusFile, termListFile, _] <- getArgs
77
78 --corpus :: IO (DM.IntMap [[Text]])
79 corpus <- DM.fromListWith (<>)
80 . DV.toList
81 . DV.map (\n -> (csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
82 . snd
83 <$> readCsv corpusFile
84
85 -- termListMap :: [Text]
86 termList <- csvGraphTermList termListFile
87
88 putStrLn $ show $ length termList
89
90 let years = DM.keys corpus
91 let patterns = WithList $ buildPatterns termList
92 let corpus' = DMaybe.catMaybes $ map (\k -> DM.lookup k corpus) years
93
94
95 r <- mapConcurrently (filterTermsAndCooc patterns) (zip years corpus')
96 putStrLn $ show r
97 --writeFile outputFile cooc