src/Gargantext/Text/Metrics.hs

   1 {-|
   2 Module      : Gargantext.Text.Metrics
   3 Description : All parsers of Gargantext in one file.
   4 Copyright   : (c) CNRS, 2017 - present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 Mainly reexport functions in @Data.Text.Metrics@
  11
  12 -}
  13
  14 {-# LANGUAGE BangPatterns      #-}
  15 {-# LANGUAGE NoImplicitPrelude #-}
  16 {-# LANGUAGE OverloadedStrings #-}
  17
  18 module Gargantext.Text.Metrics
  19   where
  20
  21 --import Data.Array.Accelerate ((:.)(..), Z(..))
  22 --import Math.KMeans (kmeans, euclidSq, elements)
  23
  24 --import GHC.Float (exp)
  25 import Data.Tuple.Extra (both)
  26 import Data.Map (Map)
  27 import Data.List.Extra (sortOn)
  28 import GHC.Real (round)
  29 import Gargantext.Prelude
  30 import Gargantext.Viz.Graph.Distances.Matrice
  31 import Gargantext.Viz.Graph.Index
  32 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
  33 import qualified Data.Array.Accelerate as DAA
  34 import qualified Data.Array.Accelerate.Interpreter as DAA
  35 import qualified Data.List as List
  36 import qualified Data.Map  as Map
  37
  38 import qualified Data.Vector.Storable as Vec
  39
  40 type GraphListSize = Int
  41 type InclusionSize = Int
  42
  43 {-
  44 toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
  45 toScored' = map2scored
  46          . (pcaReduceTo (Dimension 2))
  47          . (Map.filter (\v -> Vec.length v > 1))
  48          . (Map.unionsWith (<>))
  49 -}
  50
  51 scored :: Ord t => Map (t,t) Int -> [Scored t]
  52 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
  53   where
  54     scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
  55     scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
  56
  57     map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
  58     map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
  59
  60 -- TODO change type with (x,y)
  61 data Scored ts = Scored
  62   { _scored_terms  :: !ts
  63   , _scored_incExc :: !InclusionExclusion
  64   , _scored_speGen :: !SpecificityGenericity
  65   } deriving (Show)
  66
  67 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
  68 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
  69                                        (Map.toList fi)
  70                                        scores
  71   where
  72     (ti, fi) = createIndices m
  73     (is, ss) = incExcSpeGen $ cooc2mat ti m
  74     scores   = DAA.toList
  75              $ DAA.run
  76              $ DAA.zip (DAA.use is) (DAA.use ss)
  77
  78
  79 -- TODO Code to be remove below
  80 -- TODO in the textflow we end up needing these indices , it might be
  81 -- better to compute them earlier and pass them around.
  82 scored' :: Ord t => Map (t,t) Int -> [Scored t]
  83 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
  84   where
  85     (ti, fi) = createIndices m
  86     (is, ss) = incExcSpeGen $ cooc2mat ti m
  87     scores   = DAA.toList
  88              $ DAA.run
  89              $ DAA.zip (DAA.use is) (DAA.use ss)
  90
  91
  92 takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> ([t],[t])
  93 takeScored listSize incSize = both (map _scored_terms)
  94                             . linearTakes listSize incSize _scored_speGen
  95                                                            _scored_incExc
  96                             . scored
  97
  98
  99 -- | Filter Scored data
 100 -- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
 101 -- [(3,8),(6,5)]
 102 linearTakes :: (Ord b1, Ord b2)
 103             => GraphListSize -> InclusionSize
 104             -> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
 105 linearTakes mls incSize speGen incExc = (List.splitAt mls)
 106                       . List.concat
 107                       . map (take $ round
 108                                   $ (fromIntegral mls     :: Double)
 109                                   / (fromIntegral incSize :: Double)
 110                              )
 111                       . map (sortOn speGen)
 112                       . splitEvery incSize
 113                       . take 5000
 114                       . takePercent (0.70)
 115                       . sortOn incExc
 116
 117 takePercent :: Double -> [a] -> [a]
 118 takePercent l xs = List.take l' xs
 119   where
 120     l' = round $ l * (fromIntegral $ List.length xs)
 121
 122 splitTake :: (Int, a -> Bool) -> (Int, a -> Bool) -> [a] -> ([a], [a])
 123 splitTake (a, af) (b, bf) xs = (mpa <> mpb, ca <> cb)
 124   where
 125     (mpa, ca) = List.splitAt a $ List.filter af xs
 126     (mpb, cb) = List.splitAt b $ List.filter bf xs
 127