-}
-{-# LANGUAGE DataKinds #-}
-{-# LANGUAGE DeriveGeneric #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneDeriving #-}
-{-# LANGUAGE TypeOperators #-}
+{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module Main where
-import System.Directory (doesFileExist)
-
+import Control.Concurrent.Async as CCA (mapConcurrently)
+import Control.Monad (mapM)
import Data.Aeson
-import Data.Text (Text, unwords)
-import Data.List ((++))
+import Data.List ((++),concat)
+import Data.Maybe
+import Data.Text (Text, unwords, unlines)
import GHC.Generics
import GHC.IO (FilePath)
+import Gargantext.Database.Admin.Types.Hyperdata
+import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude
-import Gargantext.Text.List.CSV (csvGraphTermList)
-import Gargantext.Text.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
-import qualified Gargantext.Text.Parsers.CSV as CSV
-import Gargantext.Text.Parsers (FileFormat(..),parseFile)
-import Gargantext.Text.Terms.WithList
-import Gargantext.Text.Context (TermList)
-
+import Gargantext.Core.Text.Context (TermList)
+import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
+import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
+import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
+import Gargantext.Core.Text.Terms.WithList
+import Gargantext.Core.Viz.Phylo
+import Gargantext.Core.Viz.Phylo.LevelMaker
+import Gargantext.Core.Viz.Phylo.Tools
+import Gargantext.Core.Viz.Phylo.View.Export
+import Gargantext.Core.Viz.Phylo.View.ViewMaker
+import System.Directory (doesFileExist)
import System.Environment
-
-import Gargantext.Viz.Phylo
-import Gargantext.Viz.Phylo.Tools
-import Gargantext.Viz.Phylo.LevelMaker
-import Gargantext.Viz.Phylo.View.Export
-import Gargantext.Viz.Phylo.View.ViewMaker
-
-import Gargantext.Database.Types.Node
-import Data.Maybe
-
-import qualified Data.Map as DM
-import qualified Data.Vector as DV
+import qualified Data.ByteString.Lazy as L
import qualified Data.List as DL
+import qualified Data.Map as DM
import qualified Data.Text as DT
+import qualified Data.Vector as DV
+import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as CSV
import qualified Prelude as P
-import qualified Data.ByteString.Lazy as L
--------------
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
- termsInText pats txt = DL.nub $ DL.concat $ map (map unwords) $ extractTermsWithList pats txt
+ termsInText pats txt = DL.nub
+ $ DL.concat
+ $ map (map unwords)
+ $ extractTermsWithList pats txt
--------------------------------------
-- | To transform a Wos nfile into a readable corpus
wosToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)])
wosToCorpus limit path = DL.take limit
- . map (\d -> ((fromJust $_hyperdataDocument_publication_year d)
- ,(fromJust $_hyperdataDocument_title d) <> " " <> (fromJust $_hyperdataDocument_abstract d)))
- . filter (\d -> (isJust $_hyperdataDocument_publication_year d)
- && (isJust $_hyperdataDocument_title d)
- && (isJust $_hyperdataDocument_abstract d))
- <$> parseFile WOS path
+ . map (\d -> ((fromJust $_hd_publication_year d)
+ ,(fromJust $_hd_title d) <> " " <> (fromJust $_hd_abstract d)))
+ . filter (\d -> (isJust $_hd_publication_year d)
+ && (isJust $_hd_title d)
+ && (isJust $_hd_abstract d))
+ . concat
+ <$> mapConcurrently (\idx -> parseFile WOS (path <> show(idx) <> ".txt")) [1..20]
-- | To use the correct parser given a CorpusType
P.Left err -> putStrLn err
P.Right conf -> do
- termList <- csvGraphTermList (listPath conf)
+ termList <- csvMapTermList (listPath conf)
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList
- putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
-
- let roots = DL.nub $ DL.concat $ map text corpus
-
- putStrLn $ ("\n" <> show (length roots) <> " parsed foundation roots")
+ putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf)
putStrLn $ ("\n" <> show (length fis) <> " parsed fis")
- let mFis = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
+ let fis' = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (timeFrameTh conf)
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
- let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
+ let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge,BranchBirth,BranchGroups] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchBirth,Asc)) Json Flat True
- let phylo = toPhylo query corpus roots termList mFis
+ let phylo = toPhylo query corpus termList fis'
writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)