module Main where
import Data.Aeson
-import Data.ByteString.Lazy (ByteString)
-import Data.Maybe (isJust, fromJust)
-import Data.List (concat, nub, isSuffixOf, take)
+import Data.List (concat, nub, isSuffixOf)
import Data.String (String)
-import Data.Text (Text, unwords, unpack)
+import Data.Text (Text, unwords, unpack, replace)
+import Crypto.Hash.SHA256 (hash)
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
-import Gargantext.Text.Context (TermList)
-import Gargantext.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
-import Gargantext.Text.Corpus.Parsers (FileFormat(..),parseFile)
-import Gargantext.Text.List.CSV (csvMapTermList)
-import Gargantext.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
-import Gargantext.Viz.AdaptativePhylo
-import Gargantext.Viz.Phylo.PhyloMaker (toPhylo)
-import Gargantext.Viz.Phylo.PhyloTools (printIOMsg, printIOComment)
-import Gargantext.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
--- import Gargantext.Viz.Phylo.SynchronicClustering (synchronicDistance')
+import Gargantext.Core.Text.Context (TermList)
+import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
+import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
+import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
+import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
+import Gargantext.Core.Viz.AdaptativePhylo
+import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
+import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
+import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import GHC.IO (FilePath)
-import Prelude (Either(..))
+import Prelude (Either(Left, Right))
import System.Environment
-import System.Directory (listDirectory)
+import System.Directory (listDirectory,doesFileExist)
import Control.Concurrent.Async (mapConcurrently)
+import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Vector as Vector
-import qualified Gargantext.Text.Corpus.Parsers.CSV as Csv
+import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
+import qualified Data.Text as T
+
+
+data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
+
---------------
-- | To read and decode a Json file
-readJson :: FilePath -> IO ByteString
+readJson :: FilePath -> IO Lazy.ByteString
readJson path = Lazy.readFile path
wosToCorpus limit path = do
files <- getFilesFromPath path
take limit
- <$> map (\d -> let date' = fromJust $ _hyperdataDocument_publication_year d
- title = fromJust $ _hyperdataDocument_title d
- abstr = if (isJust $ _hyperdataDocument_abstract d)
- then fromJust $ _hyperdataDocument_abstract d
+ <$> map (\d -> let date' = fromJust $ _hd_publication_year d
+ title = fromJust $ _hd_title d
+ abstr = if (isJust $ _hd_abstract d)
+ then fromJust $ _hd_abstract d
else ""
in (date', title <> " " <> abstr))
<$> concat
<$> mapConcurrently (\file ->
- filter (\d -> (isJust $ _hyperdataDocument_publication_year d)
- && (isJust $ _hyperdataDocument_title d))
+ filter (\d -> (isJust $ _hd_publication_year d)
+ && (isJust $ _hd_title d))
<$> parseFile WOS (path <> file) ) files
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
+-- Config time parameters to label
+timeToLabel :: Config -> [Char]
+timeToLabel config = case (timeUnit config) of
+ Year p s f -> ("time"<> "_"<> (show p) <> "_" <> (show s) <> "_" <> (show f))
+
+
+seaToLabel :: Config -> [Char]
+seaToLabel config = case (seaElevation config) of
+ Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
+ Adaptative granularity -> ("sea_adapt" <> (show granularity))
+
+
+sensToLabel :: Config -> [Char]
+sensToLabel config = case (phyloProximity config) of
+ Hamming -> undefined
+ WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s)
+ WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s)
+
+
+cliqueToLabel :: Config -> [Char]
+cliqueToLabel config = case (clique config) of
+ Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
+ MaxClique s t f -> "clique_" <> (show s)<> "_" <> (show f)<> "_" <> (show t)
+
+
+syncToLabel :: Config -> [Char]
+syncToLabel config = case (phyloSynchrony config) of
+ ByProximityThreshold scl sync_sens scope _ -> ("scale_" <> (show scope) <> "_" <> (show sync_sens) <> "_" <> (show scl))
+ ByProximityDistribution _ _ -> undefined
+
+qualToConfig :: Config -> [Char]
+qualToConfig config = case (phyloQuality config) of
+ Quality g m -> "quality_" <> (show g) <> "_" <> (show m)
+
+
+-- To set up the export file's label from the configuration
+configToLabel :: Config -> [Char]
+configToLabel config = outputPath config
+ <> (unpack $ phyloName config)
+ <> "-" <> (timeToLabel config)
+ <> "-scale_" <> (show (phyloLevel config))
+ <> "-" <> (seaToLabel config)
+ <> "-" <> (sensToLabel config)
+ <> "-" <> (cliqueToLabel config)
+ <> "-level_" <> (show (_qua_granularity $ phyloQuality config))
+ <> "-" <> (syncToLabel config)
+ <> ".dot"
+
+
+-- To write a sha256 from a set of config's parameters
+configToSha :: PhyloStage -> Config -> [Char]
+configToSha stage config = unpack
+ $ replace "/" "-"
+ $ T.pack (show (hash $ C8.pack label))
+ where
+ label :: [Char]
+ label = case stage of
+ PhyloWithCliques -> (corpusPath config)
+ <> (listPath config)
+ <> (timeToLabel config)
+ <> (cliqueToLabel config)
+ PhyloWithLinks -> (corpusPath config)
+ <> (listPath config)
+ <> (timeToLabel config)
+ <> (cliqueToLabel config)
+ <> (sensToLabel config)
+ <> (seaToLabel config)
+ <> (syncToLabel config)
+ <> (qualToConfig config)
+ <> (show (phyloLevel config))
+
+
+writePhylo :: [Char] -> Phylo -> IO ()
+writePhylo path phylo = Lazy.writeFile path $ encode phylo
+
+
+readPhylo :: [Char] -> IO Phylo
+readPhylo path = do
+ phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
+ case phyloJson of
+ Left err -> do
+ putStrLn err
+ undefined
+ Right phylo -> pure phylo
+
+
--------------
-- | Main | --
--------------
corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
- printIOMsg "Reconstruct the Phylo"
-
- let phylo = toPhylo corpus mapList config
+ printIOMsg "Reconstruct the phylo"
+
+ let phyloWithCliquesFile = (outputPath config) <> "phyloWithCliques_" <> (configToSha PhyloWithCliques config) <> ".json"
+ let phyloWithLinksFile = (outputPath config) <> "phyloWithLinks_" <> (configToSha PhyloWithLinks config) <> ".json"
+
+ phyloWithCliquesExists <- doesFileExist phyloWithCliquesFile
+ phyloWithLinksExists <- doesFileExist phyloWithLinksFile
+
+ -- phyloStep <- if phyloWithCliquesExists
+ -- then do
+ -- printIOMsg "Reconstruct the phylo step from an existing file"
+ -- readPhylo phyloWithCliquesFile
+ -- else do
+ -- printIOMsg "Reconstruct the phylo step from scratch"
+ -- pure $ toPhyloStep corpus mapList config
+
+ -- writePhylo phyloWithCliquesFile phyloStep
+
+ -- let phylo = toPhylo (setConfig config phyloStep)
+
+ phyloWithLinks <- if phyloWithLinksExists
+ then do
+ printIOMsg "Reconstruct the phylo from an existing file with intertemporal links"
+ readPhylo phyloWithLinksFile
+ else do
+ if phyloWithCliquesExists
+ then do
+ printIOMsg "Reconstruct the phylo from an existing file with cliques"
+ phyloWithCliques <- readPhylo phyloWithCliquesFile
+ writePhylo phyloWithCliquesFile phyloWithCliques
+ pure $ toPhylo (setConfig config phyloWithCliques)
+ else do
+ printIOMsg "Reconstruct the phylo from scratch"
+ phyloWithCliques <- pure $ toPhyloStep corpus mapList config
+ writePhylo phyloWithCliquesFile phyloWithCliques
+ pure $ toPhylo (setConfig config phyloWithCliques)
+
+ writePhylo phyloWithLinksFile phyloWithLinks
+
-- | probes
printIOMsg "End of reconstruction, start the export"
- let dot = toPhyloExport phylo
-
- let clq = case (clique config) of
- Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
- MaxClique s -> "clique_" <> (show s)
-
- let sensibility = case (phyloProximity config) of
- Hamming -> undefined
- WeightedLogJaccard s -> (show s)
-
- let output = (outputPath config)
- <> (unpack $ phyloName config)
- <> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
- <> "-level_" <> (show (phyloLevel config))
- <> "-" <> clq
- <> "-sens_" <> sensibility
- <> ".dot"
+ let dot = toPhyloExport (setConfig config phyloWithLinks)
+
+ let output = configToLabel config
dotToFile output dot