2 Module : Gargantext.Core.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mainly reexport functions in @Data.Text.Metrics@
14 {-# LANGUAGE TemplateHaskell #-}
16 module Gargantext.Core.Text.Metrics
19 --import Data.Array.Accelerate ((:.)(..), Z(..))
20 --import Math.KMeans (kmeans, euclidSq, elements)
21 import Control.Lens (makeLenses)
23 import Data.Monoid (Monoid, mempty)
24 import Data.HashMap.Strict (HashMap)
25 import Data.Semigroup (Semigroup)
26 import Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
27 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
28 import Gargantext.Core.Viz.Graph.Index
29 import Gargantext.Prelude
30 import qualified Data.Array.Accelerate as DAA
31 import qualified Data.Array.Accelerate.Interpreter as DAA
32 import qualified Data.Map as Map
33 import qualified Data.Vector as V
34 import qualified Data.Vector.Storable as Vec
35 import qualified Data.HashMap.Strict as HashMap
38 type MapListSize = Int
39 type InclusionSize = Int
41 scored :: Ord t => HashMap (t,t) Int -> V.Vector (Scored t)
42 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map . Map.fromList . HashMap.toList
44 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
45 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
47 map2scored :: Ord t => Map t (Vec.Vector Double) -> V.Vector (Scored t)
48 map2scored = V.map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . V.fromList . Map.toList
50 -- TODO change type with (x,y)
51 data Scored ts = Scored
52 { _scored_terms :: !ts
53 , _scored_genInc :: !GenericityInclusion
54 , _scored_speExc :: !SpecificityExclusion
55 } deriving (Show, Eq, Ord)
57 instance Monoid a => Monoid (Scored a) where
58 mempty = Scored mempty mempty mempty
60 instance Semigroup a => Semigroup (Scored a) where
63 = Scored (a {-<> a'-})
67 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
68 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
72 (ti, fi) = createIndices m
73 (is, ss) = incExcSpeGen $ cooc2mat Triangle ti m
76 $ DAA.zip (DAA.use is) (DAA.use ss)
78 -- TODO Code to be removed below
79 -- TODO in the textflow we end up needing these indices , it might be
80 -- better to compute them earlier and pass them around.
81 scored' :: Ord t => Map (t,t) Int -> [Scored t]
82 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
84 (ti, fi) = createIndices m
85 (is, ss) = incExcSpeGen $ cooc2mat Triangle ti m
88 $ DAA.zip (DAA.use is) (DAA.use ss)
91 normalizeGlobal :: [Scored a] -> [Scored a]
92 normalizeGlobal ss = map (\(Scored t s1 s2)
93 -> Scored t ((s1 - s1min) / s1max)
94 ((s2 - s2min) / s2max)) ss
96 ss1 = map _scored_genInc ss
97 ss2 = map _scored_speExc ss
107 normalizeLocal :: Scored a -> Scored a
108 normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
110 log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)