2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mainly reexport functions in @Data.Text.Metrics@
14 {-# LANGUAGE BangPatterns #-}
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE OverloadedStrings #-}
18 module Gargantext.Text.Metrics
21 --import Data.Array.Accelerate ((:.)(..), Z(..))
22 --import Math.KMeans (kmeans, euclidSq, elements)
24 --import GHC.Float (exp)
27 import Data.List.Extra (sortOn)
28 import GHC.Real (round)
29 import Gargantext.Prelude
30 import Gargantext.Viz.Graph.Distances.Matrice
31 import Gargantext.Viz.Graph.Index
32 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
33 import qualified Data.Array.Accelerate as DAA
34 import qualified Data.Array.Accelerate.Interpreter as DAA
35 import qualified Data.List as List
36 import qualified Data.Map as Map
38 import qualified Data.Vector.Storable as Vec
40 type GraphListSize = Int
41 type InclusionSize = Int
43 toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
45 . (pcaReduceTo (Dimension 2))
46 . (Map.filter (\v -> Vec.length v > 1))
47 . (Map.unionsWith (<>))
50 scored :: Ord t => Map (t,t) Int -> [Scored t]
51 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
53 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
54 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
56 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
57 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
59 -- TODO change type with (x,y)
60 data Scored ts = Scored
61 { _scored_terms :: !ts
62 , _scored_incExc :: !InclusionExclusion
63 , _scored_speGen :: !SpecificityGenericity
66 localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
67 localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
71 (ti, fi) = createIndices m
72 (is, ss) = incExcSpeGen $ cooc2mat ti m
75 $ DAA.zip (DAA.use is) (DAA.use ss)
78 -- TODO Code to be remove below
79 -- TODO in the textflow we end up needing these indices , it might be
80 -- better to compute them earlier and pass them around.
81 scored' :: Ord t => Map (t,t) Int -> [Scored t]
82 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
84 (ti, fi) = createIndices m
85 (is, ss) = incExcSpeGen $ cooc2mat ti m
88 $ DAA.zip (DAA.use is) (DAA.use ss)
91 takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
92 takeScored listSize incSize = map _scored_terms
93 . linearTakes listSize incSize _scored_speGen
98 -- | Filter Scored data
99 -- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
101 linearTakes :: (Ord b1, Ord b2)
102 => GraphListSize -> InclusionSize
103 -> (a -> b2) -> (a -> b1) -> [a] -> [a]
104 linearTakes gls incSize speGen incExc = take gls
107 $ (fromIntegral gls :: Double)
108 / (fromIntegral incSize :: Double)
110 . map (sortOn incExc)