]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Metrics.hs
Continue refactoring...
[gargantext.git] / src / Gargantext / Core / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Core.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12 -}
13
14 {-# LANGUAGE TemplateHaskell #-}
15
16 module Gargantext.Core.Text.Metrics
17 where
18
19 --import Data.Array.Accelerate ((:.)(..), Z(..))
20 --import Math.KMeans (kmeans, euclidSq, elements)
21 import Control.Lens (makeLenses)
22 import Data.Map (Map)
23 import Data.Semigroup (Semigroup)
24 import Data.Monoid (Monoid, mempty)
25 import Gargantext.Prelude
26 import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
27 import Gargantext.Core.Viz.Graph.Index
28 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
29 import qualified Data.Array.Accelerate as DAA
30 import qualified Data.Array.Accelerate.Interpreter as DAA
31 import qualified Data.Map as Map
32
33 import qualified Data.Vector as V
34 import qualified Data.Vector.Storable as Vec
35
36 type MapListSize = Int
37 type InclusionSize = Int
38
39 scored :: Ord t => Map (t,t) Int -> V.Vector (Scored t)
40 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
41 where
42 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
43 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
44
45 map2scored :: Ord t => Map t (Vec.Vector Double) -> V.Vector (Scored t)
46 map2scored = V.map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . V.fromList . Map.toList
47
48 -- TODO change type with (x,y)
49 data Scored ts = Scored
50 { _scored_terms :: !ts
51 , _scored_genInc :: !GenericityInclusion
52 , _scored_speExc :: !SpecificityExclusion
53 } deriving (Show, Eq, Ord)
54
55 instance Monoid a => Monoid (Scored a) where
56 mempty = Scored mempty mempty mempty
57
58 instance Semigroup a => Semigroup (Scored a) where
59 (<>) (Scored a b c )
60 (Scored _a' b' c')
61 = Scored (a {-<> a'-})
62 (b <> b')
63 (c <> c')
64
65 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
66 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
67 (Map.toList fi)
68 scores
69 where
70 (ti, fi) = createIndices m
71 (is, ss) = incExcSpeGen $ cooc2mat ti m
72 scores = DAA.toList
73 $ DAA.run
74 $ DAA.zip (DAA.use is) (DAA.use ss)
75
76 -- TODO Code to be removed below
77 -- TODO in the textflow we end up needing these indices , it might be
78 -- better to compute them earlier and pass them around.
79 scored' :: Ord t => Map (t,t) Int -> [Scored t]
80 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
81 where
82 (ti, fi) = createIndices m
83 (is, ss) = incExcSpeGen $ cooc2mat ti m
84 scores = DAA.toList
85 $ DAA.run
86 $ DAA.zip (DAA.use is) (DAA.use ss)
87
88
89 normalizeGlobal :: [Scored a] -> [Scored a]
90 normalizeGlobal ss = map (\(Scored t s1 s2)
91 -> Scored t ((s1 - s1min) / s1max)
92 ((s2 - s2min) / s2max)) ss
93 where
94 ss1 = map _scored_genInc ss
95 ss2 = map _scored_speExc ss
96
97 s1min = minimum ss1
98 s1max = maximum ss1
99
100 s2min = minimum ss2
101 s2max = maximum ss2
102
103
104
105 normalizeLocal :: Scored a -> Scored a
106 normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
107 where
108 log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
109
110
111
112 -- | Type Instances
113 makeLenses 'Scored