src/Gargantext/Core/Text/Metrics.hs

   1 {-|
   2 Module      : Gargantext.Core.Text.Metrics
   3 Description : All parsers of Gargantext in one file.
   4 Copyright   : (c) CNRS, 2017 - present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 Mainly reexport functions in @Data.Text.Metrics@
  11
  12 -}
  13
  14 {-# LANGUAGE BangPatterns      #-}
  15
  16 module Gargantext.Core.Text.Metrics
  17   where
  18
  19 --import Data.Array.Accelerate ((:.)(..), Z(..))
  20 --import Math.KMeans (kmeans, euclidSq, elements)
  21
  22 --import GHC.Float (exp)
  23 import Data.Tuple.Extra (both)
  24 import Data.Map (Map)
  25 import Data.List.Extra (sortOn)
  26 import Gargantext.Prelude
  27 import Gargantext.Core.Viz.Graph.Distances.Matrice
  28 import Gargantext.Core.Viz.Graph.Index
  29 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
  30 import qualified Data.Array.Accelerate as DAA
  31 import qualified Data.Array.Accelerate.Interpreter as DAA
  32 import qualified Data.List as List
  33 import qualified Data.Map  as Map
  34
  35 import qualified Data.Vector.Storable as Vec
  36
  37 type MapListSize = Int
  38 type InclusionSize = Int
  39
  40 {-
  41 toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
  42 toScored' = map2scored
  43          . (pcaReduceTo (Dimension 2))
  44          . (Map.filter (\v -> Vec.length v > 1))
  45          . (Map.unionsWith (<>))
  46 -}
  47
  48 scored :: Ord t => Map (t,t) Int -> [Scored t]
  49 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
  50   where
  51     scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
  52     scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
  53
  54     map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
  55     map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
  56
  57 -- TODO change type with (x,y)
  58 data Scored ts = Scored
  59   { _scored_terms  :: !ts
  60   , _scored_genInc :: !GenericityInclusion
  61   , _scored_speExc :: !SpecificityExclusion
  62   } deriving (Show)
  63
  64 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
  65 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
  66                                          (Map.toList fi)
  67                                           scores
  68   where
  69     (ti, fi) = createIndices m
  70     (is, ss) = incExcSpeGen $ cooc2mat ti m
  71     scores   = DAA.toList
  72              $ DAA.run
  73              $ DAA.zip (DAA.use is) (DAA.use ss)
  74
  75 -- TODO Code to be removed below
  76 -- TODO in the textflow we end up needing these indices , it might be
  77 -- better to compute them earlier and pass them around.
  78 scored' :: Ord t => Map (t,t) Int -> [Scored t]
  79 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
  80   where
  81     (ti, fi) = createIndices m
  82     (is, ss) = incExcSpeGen $ cooc2mat ti m
  83     scores   = DAA.toList
  84              $ DAA.run
  85              $ DAA.zip (DAA.use is) (DAA.use ss)
  86
  87
  88 takeScored :: Ord t => MapListSize -> InclusionSize -> Map (t,t) Int -> ([t],[t])
  89 takeScored listSize incSize = both (map _scored_terms)
  90                             . takeLinear listSize incSize _scored_genInc
  91                                                           _scored_speExc
  92                             . scored
  93
  94
  95 -- | Filter Scored data
  96 -- >>> takeLinear 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
  97 -- [(3,8),(6,5)]
  98 takeLinear :: (Ord b1, Ord b2)
  99             => MapListSize -> InclusionSize
 100             -> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
 101 takeLinear mls incSize speGen incExc = (List.splitAt mls)
 102                       . List.concat
 103                       . map (take $ round
 104                                   $ (fromIntegral mls     :: Double)
 105                                   / (fromIntegral incSize :: Double)
 106                              )
 107                       . map (sortOn speGen)
 108                       . splitEvery incSize
 109                       . take 5000
 110                       . takePercent (0.70)
 111                       . sortOn incExc
 112
 113 takePercent :: Double -> [a] -> [a]
 114 takePercent l xs = List.take l' xs
 115   where
 116     l' = round $ l * (fromIntegral $ List.length xs)
 117
 118 splitTake :: (Int, a -> Bool) -> (Int, a -> Bool) -> [a] -> ([a], [a])
 119 splitTake (a, af) (b, bf) xs = (mpa <> mpb, ca <> cb)
 120   where
 121     (mpa, ca) = List.splitAt a $ List.filter af xs
 122     (mpb, cb) = List.splitAt b $ List.filter bf xs
 123