{-| Module : Gargantext.Core.Text.Metrics Description : All parsers of Gargantext in one file. Copyright : (c) CNRS, 2017 - present License : AGPL + CECILL v3 Maintainer : team@gargantext.org Stability : experimental Portability : POSIX Mainly reexport functions in @Data.Text.Metrics@ -} {-# LANGUAGE BangPatterns #-} module Gargantext.Core.Text.Metrics where --import Data.Array.Accelerate ((:.)(..), Z(..)) --import Math.KMeans (kmeans, euclidSq, elements) import Data.Map (Map) import Gargantext.Prelude import Gargantext.Core.Viz.Graph.Distances.Matrice import Gargantext.Core.Viz.Graph.Index import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..)) import qualified Data.Array.Accelerate as DAA import qualified Data.Array.Accelerate.Interpreter as DAA import qualified Data.Map as Map import qualified Data.Vector.Storable as Vec type MapListSize = Int type InclusionSize = Int scored :: Ord t => Map (t,t) Int -> [Scored t] scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map where scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double) scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t] map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList -- TODO change type with (x,y) data Scored ts = Scored { _scored_terms :: !ts , _scored_genInc :: !GenericityInclusion , _scored_speExc :: !SpecificityExclusion } deriving (Show) localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double) localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe])) (Map.toList fi) scores where (ti, fi) = createIndices m (is, ss) = incExcSpeGen $ cooc2mat ti m scores = DAA.toList $ DAA.run $ DAA.zip (DAA.use is) (DAA.use ss) -- TODO Code to be removed below -- TODO in the textflow we end up needing these indices , it might be -- better to compute them earlier and pass them around. scored' :: Ord t => Map (t,t) Int -> [Scored t] scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores where (ti, fi) = createIndices m (is, ss) = incExcSpeGen $ cooc2mat ti m scores = DAA.toList $ DAA.run $ DAA.zip (DAA.use is) (DAA.use ss) normalizeGlobal :: [Scored a] -> [Scored a] normalizeGlobal ss = map (\(Scored t s1 s2) -> Scored t ((s1 - s1min) / s1max) ((s2 - s2min) / s2max)) ss where ss1 = map _scored_genInc ss ss2 = map _scored_speExc ss s1min = minimum ss1 s1max = maximum ss1 s2min = minimum ss2 s2max = maximum ss2 normalizeLocal :: Scored a -> Scored a normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2) where log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)