2 Module : Gargantext.Core.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mainly reexport functions in @Data.Text.Metrics@
14 {-# LANGUAGE BangPatterns #-}
16 module Gargantext.Core.Text.Metrics
19 --import Data.Array.Accelerate ((:.)(..), Z(..))
20 --import Math.KMeans (kmeans, euclidSq, elements)
22 --import GHC.Float (exp)
23 import Data.Tuple.Extra (both)
25 import Data.List.Extra (sortOn)
26 import Gargantext.Prelude
27 import Gargantext.Core.Viz.Graph.Distances.Matrice
28 import Gargantext.Core.Viz.Graph.Index
29 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
30 import qualified Data.Array.Accelerate as DAA
31 import qualified Data.Array.Accelerate.Interpreter as DAA
32 import qualified Data.List as List
33 import qualified Data.Map as Map
35 import qualified Data.Vector.Storable as Vec
37 type MapListSize = Int
38 type InclusionSize = Int
41 toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
42 toScored' = map2scored
43 . (pcaReduceTo (Dimension 2))
44 . (Map.filter (\v -> Vec.length v > 1))
45 . (Map.unionsWith (<>))
48 scored :: Ord t => Map (t,t) Int -> [Scored t]
49 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
51 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
52 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
54 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
55 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
57 -- TODO change type with (x,y)
58 data Scored ts = Scored
59 { _scored_terms :: !ts
60 , _scored_incExc :: !InclusionExclusion
61 , _scored_speGen :: !SpecificityGenericity
64 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
65 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
69 (ti, fi) = createIndices m
70 (is, ss) = incExcSpeGen $ cooc2mat ti m
73 $ DAA.zip (DAA.use is) (DAA.use ss)
75 -- TODO Code to be removed below
76 -- TODO in the textflow we end up needing these indices , it might be
77 -- better to compute them earlier and pass them around.
78 scored' :: Ord t => Map (t,t) Int -> [Scored t]
79 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
81 (ti, fi) = createIndices m
82 (is, ss) = incExcSpeGen $ cooc2mat ti m
85 $ DAA.zip (DAA.use is) (DAA.use ss)
88 takeScored :: Ord t => MapListSize -> InclusionSize -> Map (t,t) Int -> ([t],[t])
89 takeScored listSize incSize = both (map _scored_terms)
90 . takeLinear listSize incSize _scored_speGen
95 -- | Filter Scored data
96 -- >>> takeLinear 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
98 takeLinear :: (Ord b1, Ord b2)
99 => MapListSize -> InclusionSize
100 -> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
101 takeLinear mls incSize speGen incExc = (List.splitAt mls)
104 $ (fromIntegral mls :: Double)
105 / (fromIntegral incSize :: Double)
107 . map (sortOn speGen)
113 takePercent :: Double -> [a] -> [a]
114 takePercent l xs = List.take l' xs
116 l' = round $ l * (fromIntegral $ List.length xs)
118 splitTake :: (Int, a -> Bool) -> (Int, a -> Bool) -> [a] -> ([a], [a])
119 splitTake (a, af) (b, bf) xs = (mpa <> mpb, ca <> cb)
121 (mpa, ca) = List.splitAt a $ List.filter af xs
122 (mpb, cb) = List.splitAt b $ List.filter bf xs