3 Description : Gargantext starter
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Main specifications to index a corpus with a term list
14 {-# LANGUAGE DataKinds #-}
15 {-# LANGUAGE DeriveGeneric #-}
16 {-# LANGUAGE FlexibleInstances #-}
17 {-# LANGUAGE NoImplicitPrelude #-}
18 {-# LANGUAGE OverloadedStrings #-}
19 {-# LANGUAGE StandaloneDeriving #-}
20 {-# LANGUAGE TypeOperators #-}
21 {-# LANGUAGE Strict #-}
25 import qualified Data.Vector as DV
27 import Data.Text (Text)
28 import System.Environment
29 --import Control.Concurrent.Async as CCA (mapConcurrently)
31 import Gargantext.Prelude
32 import Gargantext.Text.Context
33 import Gargantext.Text.Terms
34 import Gargantext.Text.Terms.WithList
35 import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract)
36 import Gargantext.Text.List.CSV (csvGraphTermList)
37 import Gargantext.Text.Terms (terms)
38 import Gargantext.Text.Metrics.Count (cooc)
42 [corpusFile, termListFile, outputFile] <- getArgs
45 corpus <- DV.toList <$> map (\n -> (csv_title n) <> " " <> (csv_abstract n))
47 <$> readCsv corpusFile
49 putStrLn $ show $ length corpus
50 -- termListMap :: [Text]
51 termList <- csvGraphTermList termListFile
53 putStrLn $ show $ length termList
55 corpusIndexed <- mapM (terms (WithList $ buildPatterns termList)) corpus
57 putStrLn $ show corpusIndexed
58 let myCooc = cooc corpusIndexed
60 putStrLn $ show myCooc
61 --writeFile outputFile cooc