import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
-import Gargantext.Text.Context (TermList)
-import Gargantext.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
-import Gargantext.Text.Corpus.Parsers (FileFormat(..),parseFile)
-import Gargantext.Text.List.CSV (csvGraphTermList)
-import Gargantext.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
-import Gargantext.Viz.AdaptativePhylo
-import Gargantext.Viz.Phylo.PhyloMaker (toPhylo)
-import Gargantext.Viz.Phylo.PhyloTools (printIOMsg, printIOComment)
-import Gargantext.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
--- import Gargantext.Viz.Phylo.SynchronicClustering (synchronicDistance')
+import Gargantext.Core.Text.Context (TermList)
+import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
+import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
+import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
+import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
+import Gargantext.Core.Viz.AdaptativePhylo
+import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo)
+import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment)
+import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
+-- import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicDistance')
import GHC.IO (FilePath)
import Prelude (Either(..))
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Vector as Vector
-import qualified Gargantext.Text.Corpus.Parsers.CSV as Csv
+import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
---------------
wosToCorpus limit path = do
files <- getFilesFromPath path
take limit
- <$> map (\d -> let date' = fromJust $ _hyperdataDocument_publication_year d
- title = fromJust $ _hyperdataDocument_title d
- abstr = if (isJust $ _hyperdataDocument_abstract d)
- then fromJust $ _hyperdataDocument_abstract d
+ <$> map (\d -> let date' = fromJust $ _hd_publication_year d
+ title = fromJust $ _hd_title d
+ abstr = if (isJust $ _hd_abstract d)
+ then fromJust $ _hd_abstract d
else ""
in (date', title <> " " <> abstr))
<$> concat
<$> mapConcurrently (\file ->
- filter (\d -> (isJust $ _hyperdataDocument_publication_year d)
- && (isJust $ _hyperdataDocument_title d))
+ filter (\d -> (isJust $ _hd_publication_year d)
+ && (isJust $ _hd_title d))
<$> parseFile WOS (path <> file) ) files
Right config -> do
printIOMsg "Parse the corpus"
- mapList <- csvGraphTermList (listPath config)
+ mapList <- csvMapTermList (listPath config)
corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "End of reconstruction, start the export"
- let dot = toPhyloExport phylo
+ let dot = toPhyloExport phylo
+
+ let clq = case (clique config) of
+ Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
+ MaxClique s -> "clique_" <> (show s)
+
+ let sensibility = case (phyloProximity config) of
+ Hamming -> undefined
+ WeightedLogJaccard s -> (show s)
+
+ let sync = case (phyloSynchrony config) of
+ ByProximityThreshold t _ _ _ -> (show t)
+ ByProximityDistribution _ _ -> undefined
+
+ -- to be improved
+ -- let br_length = case (take 1 $ exportFilter config) of
+ -- ByBranchSize t -> (show t)
+
let output = (outputPath config)
<> (unpack $ phyloName config)
- <> "_V2.dot"
+ <> "-" <> clq
+ <> "-level_" <> (show (phyloLevel config))
+ <> "-sens_" <> sensibility
+ -- <> "-lenght_" <> br_length
+ <> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
+ <> "-sync_" <> sync
+ <> ".dot"
dotToFile output dot