]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Metrics.hs
[CLEAN] smal refact, renaming, doc
[gargantext.git] / src / Gargantext / Core / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Core.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12 -}
13
14 {-# LANGUAGE TemplateHaskell #-}
15
16 module Gargantext.Core.Text.Metrics
17 where
18
19 --import Data.Array.Accelerate ((:.)(..), Z(..))
20 --import Math.KMeans (kmeans, euclidSq, elements)
21 import Control.Lens (makeLenses)
22 import Data.Map (Map)
23 import Gargantext.Prelude
24 import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
25 import Gargantext.Core.Viz.Graph.Index
26 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
27 import qualified Data.Array.Accelerate as DAA
28 import qualified Data.Array.Accelerate.Interpreter as DAA
29 import qualified Data.Map as Map
30
31 import qualified Data.Vector.Storable as Vec
32
33 type MapListSize = Int
34 type InclusionSize = Int
35
36 scored :: Ord t => Map (t,t) Int -> [Scored t]
37 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
38 where
39 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
40 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
41
42 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
43 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
44
45 -- TODO change type with (x,y)
46 data Scored ts = Scored
47 { _scored_terms :: !ts
48 , _scored_genInc :: !GenericityInclusion
49 , _scored_speExc :: !SpecificityExclusion
50 } deriving (Show, Eq, Ord)
51
52
53 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
54 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
55 (Map.toList fi)
56 scores
57 where
58 (ti, fi) = createIndices m
59 (is, ss) = incExcSpeGen $ cooc2mat ti m
60 scores = DAA.toList
61 $ DAA.run
62 $ DAA.zip (DAA.use is) (DAA.use ss)
63
64 -- TODO Code to be removed below
65 -- TODO in the textflow we end up needing these indices , it might be
66 -- better to compute them earlier and pass them around.
67 scored' :: Ord t => Map (t,t) Int -> [Scored t]
68 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
69 where
70 (ti, fi) = createIndices m
71 (is, ss) = incExcSpeGen $ cooc2mat ti m
72 scores = DAA.toList
73 $ DAA.run
74 $ DAA.zip (DAA.use is) (DAA.use ss)
75
76
77 normalizeGlobal :: [Scored a] -> [Scored a]
78 normalizeGlobal ss = map (\(Scored t s1 s2)
79 -> Scored t ((s1 - s1min) / s1max)
80 ((s2 - s2min) / s2max)) ss
81 where
82 ss1 = map _scored_genInc ss
83 ss2 = map _scored_speExc ss
84
85 s1min = minimum ss1
86 s1max = maximum ss1
87
88 s2min = minimum ss2
89 s2max = maximum ss2
90
91
92
93 normalizeLocal :: Scored a -> Scored a
94 normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
95 where
96 log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
97
98
99
100 -- | Type Instances
101 makeLenses 'Scored