2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mainly reexport functions in @Data.Text.Metrics@
14 {-# LANGUAGE BangPatterns #-}
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE OverloadedStrings #-}
18 module Gargantext.Text.Metrics
21 --import Data.Array.Accelerate ((:.)(..), Z(..))
22 --import Math.KMeans (kmeans, euclidSq, elements)
24 --import GHC.Float (exp)
25 import Data.Tuple.Extra (both)
27 import Data.List.Extra (sortOn)
28 import GHC.Real (round)
29 import Gargantext.Prelude
30 import Gargantext.Viz.Graph.Distances.Matrice
31 import Gargantext.Viz.Graph.Index
32 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
33 import qualified Data.Array.Accelerate as DAA
34 import qualified Data.Array.Accelerate.Interpreter as DAA
35 import qualified Data.List as List
36 import qualified Data.Map as Map
38 import qualified Data.Vector.Storable as Vec
40 type GraphListSize = Int
41 type InclusionSize = Int
44 toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
45 toScored' = map2scored
46 . (pcaReduceTo (Dimension 2))
47 . (Map.filter (\v -> Vec.length v > 1))
48 . (Map.unionsWith (<>))
51 scored :: Ord t => Map (t,t) Int -> [Scored t]
52 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
54 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
55 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
57 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
58 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
60 -- TODO change type with (x,y)
61 data Scored ts = Scored
62 { _scored_terms :: !ts
63 , _scored_incExc :: !InclusionExclusion
64 , _scored_speGen :: !SpecificityGenericity
67 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
68 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
72 (ti, fi) = createIndices m
73 (is, ss) = incExcSpeGen $ cooc2mat ti m
76 $ DAA.zip (DAA.use is) (DAA.use ss)
79 -- TODO Code to be remove below
80 -- TODO in the textflow we end up needing these indices , it might be
81 -- better to compute them earlier and pass them around.
82 scored' :: Ord t => Map (t,t) Int -> [Scored t]
83 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
85 (ti, fi) = createIndices m
86 (is, ss) = incExcSpeGen $ cooc2mat ti m
89 $ DAA.zip (DAA.use is) (DAA.use ss)
92 takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> ([t],[t])
93 takeScored listSize incSize = both (map _scored_terms)
94 . linearTakes listSize incSize _scored_speGen
99 -- | Filter Scored data
100 -- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
102 linearTakes :: (Ord b1, Ord b2)
103 => GraphListSize -> InclusionSize
104 -> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
105 linearTakes gls incSize speGen incExc = (List.splitAt gls)
108 $ (fromIntegral gls :: Double)
109 / (fromIntegral incSize :: Double)
111 . map (sortOn incExc)