parameter change in the sensibility for WeightedLogSim and WeightedLogJaccard similar...

[gargantext.git] / src / Gargantext / Core / Text / Metrics.hs
diff --git a/src/Gargantext/Core/Text/Metrics.hs b/src/Gargantext/Core/Text/Metrics.hs

index 03f3399d3eb11b8d39ce276f070b40bd78c54206..4a907b069af19e394d01db000bbefa0f9c03f33b 100644 (file)
--- a/src/Gargantext/Core/Text/Metrics.hs
+++ b/src/Gargantext/Core/Text/Metrics.hs
@@ -18,18 +18,13 @@ module Gargantext.Core.Text.Metrics
  
  --import Data.Array.Accelerate ((:.)(..), Z(..))
  --import Math.KMeans (kmeans, euclidSq, elements)
-
---import GHC.Float (exp)
-import Data.Tuple.Extra (both)
  import Data.Map (Map)
-import Data.List.Extra (sortOn)
  import Gargantext.Prelude
  import Gargantext.Core.Viz.Graph.Distances.Matrice
  import Gargantext.Core.Viz.Graph.Index
  import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
  import qualified Data.Array.Accelerate as DAA
  import qualified Data.Array.Accelerate.Interpreter as DAA
-import qualified Data.List as List
  import qualified Data.Map  as Map
  
  import qualified Data.Vector.Storable as Vec
@@ -37,14 +32,6 @@ import qualified Data.Vector.Storable as Vec
  type MapListSize = Int
  type InclusionSize = Int
  
-{-
-toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t] 
-toScored' = map2scored
-         . (pcaReduceTo (Dimension 2))
-         . (Map.filter (\v -> Vec.length v > 1))
-         . (Map.unionsWith (<>))
--}
-
  scored :: Ord t => Map (t,t) Int -> [Scored t]
  scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
    where
@@ -57,10 +44,11 @@ scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
  -- TODO change type with (x,y)
  data Scored ts = Scored
    { _scored_terms  :: !ts
-  , _scored_incExc :: !InclusionExclusion
-  , _scored_speGen :: !SpecificityGenericity
+  , _scored_genInc :: !GenericityInclusion
+  , _scored_speExc :: !SpecificityExclusion
    } deriving (Show)
  
+
  localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
  localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
                                           (Map.toList fi)
@@ -76,7 +64,7 @@ localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [
  -- TODO in the textflow we end up needing these indices , it might be
  -- better to compute them earlier and pass them around.
  scored' :: Ord t => Map (t,t) Int -> [Scored t]
-scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
+scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
    where
      (ti, fi) = createIndices m
      (is, ss) = incExcSpeGen $ cooc2mat ti m
@@ -85,39 +73,28 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) s
               $ DAA.zip (DAA.use is) (DAA.use ss)
  
  
-takeScored :: Ord t => MapListSize -> InclusionSize -> Map (t,t) Int -> ([t],[t])
-takeScored listSize incSize = both (map _scored_terms)
-                            . takeLinear listSize incSize _scored_speGen
-                                                          _scored_incExc
-                            . scored
-
-
--- | Filter Scored data
--- >>> takeLinear 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
--- [(3,8),(6,5)]
-takeLinear :: (Ord b1, Ord b2)
-            => MapListSize -> InclusionSize
-            -> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
-takeLinear mls incSize speGen incExc = (List.splitAt mls)
-                      . List.concat
-                      . map (take $ round
-                                  $ (fromIntegral mls     :: Double)
-                                  / (fromIntegral incSize :: Double)
-                             )
-                      . map (sortOn speGen)
-                      . splitEvery incSize
-                      . take 5000
-                      . takePercent (0.70)
-                      . sortOn incExc
-
-takePercent :: Double -> [a] -> [a]
-takePercent l xs = List.take l' xs
+normalizeGlobal :: [Scored a] -> [Scored a]
+normalizeGlobal ss = map (\(Scored t s1 s2)
+                     -> Scored t ((s1 - s1min) / s1max)
+                                 ((s2 - s2min) / s2max)) ss
    where
-    l' = round $ l * (fromIntegral $ List.length xs)
+    ss1 = map _scored_genInc ss
+    ss2 = map _scored_speExc ss
+
+    s1min = minimum ss1
+    s1max = maximum ss1
+
+    s2min = minimum ss2
+    s2max = maximum ss2
  
-splitTake :: (Int, a -> Bool) -> (Int, a -> Bool) -> [a] -> ([a], [a])
-splitTake (a, af) (b, bf) xs = (mpa <> mpb, ca <> cb)
+
+
+normalizeLocal :: Scored a -> Scored a
+normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
    where
-    (mpa, ca) = List.splitAt a $ List.filter af xs
-    (mpb, cb) = List.splitAt b $ List.filter bf xs
+    log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
+
+
+
+