]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Metrics.hs
[FEAT/FIX] Stemming -> Parent/Children -> Patch ok
[gargantext.git] / src / Gargantext / Core / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Core.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12 -}
13
14 {-# LANGUAGE TemplateHaskell #-}
15
16 module Gargantext.Core.Text.Metrics
17 where
18
19 --import Data.Array.Accelerate ((:.)(..), Z(..))
20 --import Math.KMeans (kmeans, euclidSq, elements)
21 import Control.Lens (makeLenses)
22 import Data.Map (Map)
23 import Data.Semigroup (Semigroup)
24 import Data.Monoid (Monoid, mempty)
25 import Gargantext.Prelude
26 import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
27 import Gargantext.Core.Viz.Graph.Index
28 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
29 import qualified Data.Array.Accelerate as DAA
30 import qualified Data.Array.Accelerate.Interpreter as DAA
31 import qualified Data.Map as Map
32
33 import qualified Data.Vector.Storable as Vec
34
35 type MapListSize = Int
36 type InclusionSize = Int
37
38 scored :: Ord t => Map (t,t) Int -> [Scored t]
39 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
40 where
41 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
42 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
43
44 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
45 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
46
47 -- TODO change type with (x,y)
48 data Scored ts = Scored
49 { _scored_terms :: !ts
50 , _scored_genInc :: !GenericityInclusion
51 , _scored_speExc :: !SpecificityExclusion
52 } deriving (Show, Eq, Ord)
53
54 instance Monoid a => Monoid (Scored a) where
55 mempty = Scored mempty mempty mempty
56
57 instance Semigroup a => Semigroup (Scored a) where
58 (<>) (Scored a b c )
59 (Scored _a' b' c')
60 = Scored (a {-<> a'-})
61 (b <> b')
62 (c <> c')
63
64 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
65 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
66 (Map.toList fi)
67 scores
68 where
69 (ti, fi) = createIndices m
70 (is, ss) = incExcSpeGen $ cooc2mat ti m
71 scores = DAA.toList
72 $ DAA.run
73 $ DAA.zip (DAA.use is) (DAA.use ss)
74
75 -- TODO Code to be removed below
76 -- TODO in the textflow we end up needing these indices , it might be
77 -- better to compute them earlier and pass them around.
78 scored' :: Ord t => Map (t,t) Int -> [Scored t]
79 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
80 where
81 (ti, fi) = createIndices m
82 (is, ss) = incExcSpeGen $ cooc2mat ti m
83 scores = DAA.toList
84 $ DAA.run
85 $ DAA.zip (DAA.use is) (DAA.use ss)
86
87
88 normalizeGlobal :: [Scored a] -> [Scored a]
89 normalizeGlobal ss = map (\(Scored t s1 s2)
90 -> Scored t ((s1 - s1min) / s1max)
91 ((s2 - s2min) / s2max)) ss
92 where
93 ss1 = map _scored_genInc ss
94 ss2 = map _scored_speExc ss
95
96 s1min = minimum ss1
97 s1max = maximum ss1
98
99 s2min = minimum ss2
100 s2max = maximum ss2
101
102
103
104 normalizeLocal :: Scored a -> Scored a
105 normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
106 where
107 log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
108
109
110
111 -- | Type Instances
112 makeLenses 'Scored