-}
-{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Flow
where
-import Control.Monad.Reader
-import GHC.IO (FilePath)
import qualified Data.Text as T
-import Data.Text.IO (readFile)
-
-import Data.Map.Strict (Map)
-import Data.Maybe (catMaybes)
-import qualified Data.Set as DS
-import Data.Text (Text)
-
-import qualified Data.Array.Accelerate as A
-import qualified Data.Map.Strict as M
-----------------------------------------------
+--import Data.Text.IO (readFile)
import Database.PostgreSQL.Simple (Connection)
-
-import Gargantext.Database.Schema.Node
-import Gargantext.Database.Types.Node
-import Gargantext.Database.Utils (Cmd, mkCmd)
-
-import Gargantext.Core (Lang)
-import Gargantext.Prelude
-
-import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map)
-import Gargantext.Viz.Graph.Distances.Matrice (measureConditional)
-import Gargantext.Viz.Graph (Graph(..), data2graph)
-import Gargantext.Text.Metrics.Count (cooc)
-import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..))
-import Gargantext.Text.Terms (TermType, extractTerms)
-import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
+import GHC.IO (FilePath)
+--import Gargantext.Core (Lang)
import Gargantext.Core.Types (CorpusId)
-import Gargantext.Text.Parsers.CSV
-
-import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain, l_community_id)
-
{-
____ _ _
/ ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
| DBV3 Connection CorpusId
| Query T.Text
-
+{-
textFlow :: TermType Lang -> TextFlow -> IO Graph
textFlow termType workType = do
contexts <- case workType of
-- TermsType = Mono | Multi | MonoMulti
-- myterms # filter (\t -> not . elem t stopList)
-- # groupBy (Stem|GroupList|Ontology)
- printDebug "terms" myterms
- printDebug "myterms" (sum $ map length myterms)
+ --printDebug "terms" myterms
+ --printDebug "myterms" (sum $ map length myterms)
-- Bulding the map list
-- compute copresences of terms, i.e. cooccurrences of terms in same context of text
-- Cooc = Map (Term, Term) Int
- let myCooc1 = cooc myterms
- printDebug "myCooc1 size" (M.size myCooc1)
+ let myCooc1 = coocOn (_terms_label) myterms
+ --printDebug "myCooc1 size" (M.size myCooc1)
-- Remove Apax: appears one time only => lighting the matrix
- let myCooc2 = M.filter (>0) myCooc1
- printDebug "myCooc2 size" (M.size myCooc2)
- printDebug "myCooc2" myCooc2
+ let myCooc2 = Map.filter (>0) myCooc1
+ --printDebug "myCooc2 size" (M.size myCooc2)
+ --printDebug "myCooc2" myCooc2
g <- cooc2graph myCooc2
pure g
-
--- TODO use Text only here instead of [Text]
-cooc2graph :: (Map ([Text], [Text]) Int) -> IO Graph
-cooc2graph myCooc = do
-
- -- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
- let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 )
- (InclusionSize 500 )
- (SampleBins 10 )
- (Clusters 3 )
- (DefaultValue 0 )
- ) myCooc
- printDebug "myCooc3 size" $ M.size myCooc3
- printDebug "myCooc3" myCooc3
-
- -- Cooc -> Matrix
- let (ti, _) = createIndices myCooc3
- printDebug "ti size" $ M.size ti
- printDebug "ti" ti
-
- let myCooc4 = toIndex ti myCooc3
- printDebug "myCooc4 size" $ M.size myCooc4
- printDebug "myCooc4" myCooc4
-
- let matCooc = map2mat (0) (M.size ti) myCooc4
- printDebug "matCooc shape" $ A.arrayShape matCooc
- printDebug "matCooc" matCooc
-
- -- Matrix -> Clustering
- let distanceMat = measureConditional matCooc
- --let distanceMat = distributional matCooc
- printDebug "distanceMat shape" $ A.arrayShape distanceMat
- printDebug "distanceMat" distanceMat
-
- --let distanceMap = M.filter (>0) $ mat2map distanceMat
- let distanceMap = M.map (\_ -> 1) $ M.filter (>0) $ mat2map distanceMat
- printDebug "distanceMap size" $ M.size distanceMap
- printDebug "distanceMap" distanceMap
-
--- let distance = fromIndex fi distanceMap
--- printDebug "distance" $ M.size distance
-
- partitions <- cLouvain distanceMap
--- Building : -> Graph -> JSON
- printDebug "partitions" $ DS.size $ DS.fromList $ map (l_community_id) partitions
- --printDebug "partitions" partitions
- pure $ data2graph (M.toList ti) myCooc4 distanceMap partitions
+-}