{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneDeriving #-}
-{-# LANGUAGE TypeOperators #-}
+{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module Main where
+import System.Directory (doesFileExist)
+
import Data.Aeson
-import Data.Text (Text, unwords)
+import Data.Text (Text, unwords, unlines)
+import Data.List ((++),concat)
import GHC.Generics
import GHC.IO (FilePath)
import Gargantext.Prelude
import Gargantext.Text.List.CSV (csvGraphTermList)
-import Gargantext.Text.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
-import qualified Gargantext.Text.Parsers.CSV as CSV
-import Gargantext.Text.Parsers (FileFormat(..),parseFile)
+import Gargantext.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
+import qualified Gargantext.Text.Corpus.Parsers.CSV as CSV
+import Gargantext.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Text.Terms.WithList
import Gargantext.Text.Context (TermList)
+import Control.Monad (mapM)
+
import System.Environment
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker
-
import Gargantext.Database.Types.Node
-
import Data.Maybe
+import Control.Concurrent.Async as CCA (mapConcurrently)
import qualified Data.Map as DM
import qualified Data.Vector as DV
type ListPath = FilePath
+type FisPath = FilePath
type CorpusPath = FilePath
data CorpusType = Wos | Csv deriving (Show,Generic)
type Limit = Int
Conf { corpusPath :: CorpusPath
, corpusType :: CorpusType
, listPath :: ListPath
+ , fisPath :: FilePath
, outputPath :: FilePath
, phyloName :: Text
, limit :: Limit
, timeGrain :: Int
, timeStep :: Int
+ , timeFrame :: Int
+ , timeFrameTh :: Double
, timeTh :: Double
, timeSens :: Double
+ , reBranchThr :: Double
+ , reBranchNth :: Int
, clusterTh :: Double
, clusterSens :: Double
, phyloLevel :: Int
instance FromJSON CorpusType
instance ToJSON CorpusType
+
+decoder :: P.Either a b -> b
+decoder (P.Left _) = P.error "Error"
+decoder (P.Right x) = x
+
-- | Get the conf from a Json file
getJson :: FilePath -> IO L.ByteString
getJson path = L.readFile path
. filter (\d -> (isJust $_hyperdataDocument_publication_year d)
&& (isJust $_hyperdataDocument_title d)
&& (isJust $_hyperdataDocument_abstract d))
- <$> parseFile WOS path
+ . concat
+ <$> mapConcurrently (\idx -> parseFile WOS (path <> show(idx) <> ".txt")) [1..20]
-- | To use the correct parser given a CorpusType
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
+-- | To parse an existing Fis file
+parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis]
+parseFis path name grain step support clique = do
+ fisExists <- doesFileExist (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")
+ if fisExists
+ then do
+ fisJson <- (eitherDecode <$> getJson (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")) :: IO (P.Either P.String [PhyloFis])
+ case fisJson of
+ P.Left err -> do
+ putStrLn err
+ pure []
+ P.Right fis -> pure fis
+ else pure []
+
+writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO ()
+writeFis path name grain step support clique fis = do
+ let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
+ L.writeFile fisPath $ encode (DL.concat $ DM.elems fis)
+
--------------
-- | Main | --
--------------
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList
- let roots = DL.nub $ DL.concat $ map text corpus
+ putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
+
+ fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf)
- putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
+ putStrLn $ ("\n" <> show (length fis) <> " parsed fis")
+
+ let fis' = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
- (Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (phyloLevel conf)
+ (Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (timeFrameTh conf)
+ (reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
- let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
+ let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge,BranchBirth,BranchGroups] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchBirth,Asc)) Json Flat True
+
+ let phylo = toPhylo query corpus termList fis'
- let phylo = toPhylo query corpus roots termList
+ writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)
let view = toPhyloView queryView phylo