Mainly reexport functions in @Data.Text.Metrics@
-
-TODO
-noApax :: Ord a => Map a Occ -> Map a Occ
-noApax m = M.filter (>1) m
-
-}
+{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
-module Gargantext.Text.Metrics
+module Gargantext.Text.Metrics
where
-import Data.Text (Text, pack)
-import Data.Map (Map)
-
-import qualified Data.List as L
-import qualified Data.Map as M
-import qualified Data.Set as S
-import qualified Data.Text as T
-import Data.Tuple.Extra (both)
---import GHC.Real (Ratio)
---import qualified Data.Text.Metrics as DTM
-import Data.Array.Accelerate (toList)
+--import Data.Array.Accelerate ((:.)(..), Z(..))
+--import Math.KMeans (kmeans, euclidSq, elements)
+--import GHC.Float (exp)
+import Data.Map (Map)
+import Data.List.Extra (sortOn)
+import GHC.Real (round)
import Gargantext.Prelude
-
-import Gargantext.Text.Metrics.Count (occurrences, cooc)
-import Gargantext.Text.Terms (TermType(MonoMulti), terms)
-import Gargantext.Core (Lang(EN))
-import Gargantext.Core.Types (Terms(..))
-import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
-
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
+import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
+import qualified Data.Array.Accelerate as DAA
+import qualified Data.Array.Accelerate.Interpreter as DAA
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.Vector.Storable as Vec
--- ord relevance: top n plus inclus
--- échantillonnage de généricity
---
---filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
---filterCooc m =
----- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
-----(ti, fi) = createIndices m
--- . fromIndex fi $ filterMat $ cooc2mat ti m
+type GraphListSize = Int
+type InclusionSize = Int
+toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
+toScored = map2scored
+ . (pcaReduceTo (Dimension 2))
+ . (Map.filter (\v -> Vec.length v > 1))
+ . (Map.unionsWith (<>))
-import Data.Array.Accelerate (Matrix)
-
-filterMat :: Matrix Int -> [(Index, Index)]
-filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
- where
- (incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
- n = nIe + nSg
- nIe = 30
- nSg = 70
-
-
-
-incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
-incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
- where
- (ti,fi) = createIndices m
- ordonne x = L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x)
+scored :: Ord t => Map (t,t) Int -> [Scored t]
+scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
+scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
+scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
+map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
+map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
-metrics_text :: Text
-metrics_text = T.intercalate " " metrics_sentences
+-- TODO change type with (x,y)
+data Scored ts = Scored
+ { _scored_terms :: !ts
+ , _scored_incExc :: !InclusionExclusion
+ , _scored_speGen :: !SpecificityGenericity
+ } deriving (Show)
-metrics_sentences' :: [Text]
-metrics_sentences' = splitBy (Sentences 0) metrics_text
-
--- | Sentences
-metrics_sentences :: [Text]
-metrics_sentences = [ "There is a table with a glass of wine and a spoon."
- , "I can see the glass on the table."
- , "There was only a spoon on that table."
- , "The glass just fall from the table, pouring wine everywhere."
- , "I wish the glass did not contain wine."
- ]
-
-metrics_sentences_Test = metrics_sentences == metrics_sentences'
-
--- | Terms reordered to visually check occurrences
--- >>>
-{- [ [["table"],["glass"],["wine"],["spoon"]]
- , [["glass"],["table"]]
- , [["spoon"],["table"]]
- , [["glass"],["table"],["wine"]]
- , [["glass"],["wine"]]
- ]
--}
-
-metrics_terms :: IO [[Terms]]
-metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text
-
--- | Occurrences
-{-
-fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
- , (fromList ["object"],fromList [(["object"], 3 )])
- , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
- , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
--}
-metrics_occ = occurrences <$> L.concat <$> metrics_terms
-
-{-
--- fromList [((["glas"],["object"]),6)
- ,((["glas"],["spoon"]),4)
- ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-
--}
-metrics_cooc = cooc <$> metrics_terms
-
-metrics_cooc_mat = do
- m <- metrics_cooc
- let (ti,_) = createIndices m
- let mat_cooc = cooc2mat ti m
- pure ( ti
- , mat_cooc
- , incExcSpeGen_proba mat_cooc
- , incExcSpeGen mat_cooc
- )
-
-metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc
+localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
+localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
+ (Map.toList fi)
+ scores
+ where
+ (ti, fi) = createIndices m
+ (is, ss) = incExcSpeGen $ cooc2mat ti m
+ scores = DAA.toList
+ $ DAA.run
+ $ DAA.zip (DAA.use is) (DAA.use ss)
+
+
+-- TODO Code to be remove below
+-- TODO in the textflow we end up needing these indices , it might be
+-- better to compute them earlier and pass them around.
+scored' :: Ord t => Map (t,t) Int -> [Scored t]
+scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
+ where
+ (ti, fi) = createIndices m
+ (is, ss) = incExcSpeGen $ cooc2mat ti m
+ scores = DAA.toList
+ $ DAA.run
+ $ DAA.zip (DAA.use is) (DAA.use ss)
+
+
+takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
+takeScored listSize incSize = map _scored_terms
+ . linearTakes listSize incSize _scored_speGen
+ _scored_incExc
+ . scored
+
+
+-- | Filter Scored data
+-- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
+-- [(3,8),(6,5)]
+linearTakes :: (Ord b1, Ord b2)
+ => GraphListSize -> InclusionSize
+ -> (a -> b2) -> (a -> b1) -> [a] -> [a]
+linearTakes gls incSize speGen incExc = take gls
+ . List.concat
+ . map (take $ round
+ $ (fromIntegral gls :: Double)
+ / (fromIntegral incSize :: Double)
+ )
+ . map (sortOn incExc)
+ . splitEvery incSize
+ . sortOn speGen