]> Git — Sourcephile - gargantext.git/blob - bin/gargantext-cli/Main.hs
[Index with TermList] compiles but weird behavior.
[gargantext.git] / bin / gargantext-cli / Main.hs
1 {-|
2 Module : Main.hs
3 Description : Gargantext starter
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Main specifications to index a corpus with a term list
11
12 -}
13
14 {-# LANGUAGE DataKinds #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE NoImplicitPrelude #-}
18 {-# LANGUAGE OverloadedStrings #-}
19 {-# LANGUAGE StandaloneDeriving #-}
20 {-# LANGUAGE TypeOperators #-}
21 {-# LANGUAGE Strict #-}
22
23 module Main where
24
25 import qualified Data.Vector as DV
26
27 import Data.Text (Text)
28 import System.Environment
29 --import Control.Concurrent.Async as CCA (mapConcurrently)
30
31 import Gargantext.Prelude
32 import Gargantext.Text.Context
33 import Gargantext.Text.Terms
34 import Gargantext.Text.Terms.WithList
35 import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract)
36 import Gargantext.Text.List.CSV (csvGraphTermList)
37 import Gargantext.Text.Terms (terms)
38 import Gargantext.Text.Metrics.Count (cooc)
39
40 main :: IO ()
41 main = do
42 [corpusFile, termListFile, outputFile] <- getArgs
43
44 -- corpus :: [Text]
45 corpus <- DV.toList <$> map (\n -> (csv_title n) <> " " <> (csv_abstract n))
46 <$> snd
47 <$> readCsv corpusFile
48
49 putStrLn $ show $ length corpus
50 -- termListMap :: [Text]
51 termList <- csvGraphTermList termListFile
52
53 putStrLn $ show $ length termList
54
55 corpusIndexed <- mapM (terms (WithList $ buildPatterns termList)) corpus
56
57 putStrLn $ show corpusIndexed
58 let myCooc = cooc corpusIndexed
59
60 putStrLn $ show myCooc
61 --writeFile outputFile cooc