2 Module : Gargantext.Core.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mainly reexport functions in @Data.Text.Metrics@
14 {-# LANGUAGE BangPatterns #-}
16 module Gargantext.Core.Text.Metrics
19 --import Data.Array.Accelerate ((:.)(..), Z(..))
20 --import Math.KMeans (kmeans, euclidSq, elements)
22 --import GHC.Float (exp)
23 import Data.Tuple.Extra (both)
25 import Data.List.Extra (sortOn)
26 import GHC.Real (round)
27 import Gargantext.Prelude
28 import Gargantext.Core.Viz.Graph.Distances.Matrice
29 import Gargantext.Core.Viz.Graph.Index
30 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
31 import qualified Data.Array.Accelerate as DAA
32 import qualified Data.Array.Accelerate.Interpreter as DAA
33 import qualified Data.List as List
34 import qualified Data.Map as Map
36 import qualified Data.Vector.Storable as Vec
38 type GraphListSize = Int
39 type InclusionSize = Int
42 toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
43 toScored' = map2scored
44 . (pcaReduceTo (Dimension 2))
45 . (Map.filter (\v -> Vec.length v > 1))
46 . (Map.unionsWith (<>))
49 scored :: Ord t => Map (t,t) Int -> [Scored t]
50 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
52 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
53 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
55 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
56 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
58 -- TODO change type with (x,y)
59 data Scored ts = Scored
60 { _scored_terms :: !ts
61 , _scored_incExc :: !InclusionExclusion
62 , _scored_speGen :: !SpecificityGenericity
65 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
66 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
70 (ti, fi) = createIndices m
71 (is, ss) = incExcSpeGen $ cooc2mat ti m
74 $ DAA.zip (DAA.use is) (DAA.use ss)
77 -- TODO Code to be remove below
78 -- TODO in the textflow we end up needing these indices , it might be
79 -- better to compute them earlier and pass them around.
80 scored' :: Ord t => Map (t,t) Int -> [Scored t]
81 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
83 (ti, fi) = createIndices m
84 (is, ss) = incExcSpeGen $ cooc2mat ti m
87 $ DAA.zip (DAA.use is) (DAA.use ss)
90 takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> ([t],[t])
91 takeScored listSize incSize = both (map _scored_terms)
92 . linearTakes listSize incSize _scored_speGen
97 -- | Filter Scored data
98 -- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
100 linearTakes :: (Ord b1, Ord b2)
101 => GraphListSize -> InclusionSize
102 -> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
103 linearTakes mls incSize speGen incExc = (List.splitAt mls)
106 $ (fromIntegral mls :: Double)
107 / (fromIntegral incSize :: Double)
109 . map (sortOn speGen)
115 takePercent :: Double -> [a] -> [a]
116 takePercent l xs = List.take l' xs
118 l' = round $ l * (fromIntegral $ List.length xs)
120 splitTake :: (Int, a -> Bool) -> (Int, a -> Bool) -> [a] -> ([a], [a])
121 splitTake (a, af) (b, bf) xs = (mpa <> mpb, ca <> cb)
123 (mpa, ca) = List.splitAt a $ List.filter af xs
124 (mpb, cb) = List.splitAt b $ List.filter bf xs