]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Metrics.hs
[ngrams] add score update endpoint + sorting
[gargantext.git] / src / Gargantext / Core / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Core.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12 -}
13
14 {-# LANGUAGE TemplateHaskell #-}
15
16 module Gargantext.Core.Text.Metrics
17 where
18
19 --import Data.Array.Accelerate ((:.)(..), Z(..))
20 --import Math.KMeans (kmeans, euclidSq, elements)
21 import Control.Lens (makeLenses)
22 import Data.Map (Map)
23 import Data.Monoid (Monoid, mempty)
24 import Data.HashMap.Strict (HashMap)
25 import Data.Semigroup (Semigroup)
26 import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
27 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
28 import Gargantext.Core.Viz.Graph.Index
29 import Gargantext.Prelude
30 import qualified Data.Array.Accelerate as DAA
31 import qualified Data.Array.Accelerate.Interpreter as DAA
32 import qualified Data.Map as Map
33 import qualified Data.Vector as V
34 import qualified Data.Vector.Storable as Vec
35 import qualified Data.HashMap.Strict as HashMap
36
37
38 type MapListSize = Int
39 type InclusionSize = Int
40
41 scored :: Ord t => HashMap (t,t) Int -> V.Vector (Scored t)
42 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map . Map.fromList . HashMap.toList
43 where
44 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
45 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
46
47 map2scored :: Ord t => Map t (Vec.Vector Double) -> V.Vector (Scored t)
48 map2scored = V.map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . V.fromList . Map.toList
49
50 -- TODO change type with (x,y)
51 data Scored ts = Scored
52 { _scored_terms :: !ts
53 , _scored_genInc :: !GenericityInclusion
54 , _scored_speExc :: !SpecificityExclusion
55 } deriving (Show, Eq, Ord)
56
57 instance Monoid a => Monoid (Scored a) where
58 mempty = Scored mempty mempty mempty
59
60 instance Semigroup a => Semigroup (Scored a) where
61 (<>) (Scored a b c )
62 (Scored _a' b' c')
63 = Scored (a {-<> a'-})
64 (b <> b')
65 (c <> c')
66
67 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
68 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
69 (Map.toList fi)
70 scores
71 where
72 (ti, fi) = createIndices m
73 (is, ss) = incExcSpeGen $ cooc2mat ti m
74 scores = DAA.toList
75 $ DAA.run
76 $ DAA.zip (DAA.use is) (DAA.use ss)
77
78 -- TODO Code to be removed below
79 -- TODO in the textflow we end up needing these indices , it might be
80 -- better to compute them earlier and pass them around.
81 scored' :: Ord t => Map (t,t) Int -> [Scored t]
82 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
83 where
84 (ti, fi) = createIndices m
85 (is, ss) = incExcSpeGen $ cooc2mat ti m
86 scores = DAA.toList
87 $ DAA.run
88 $ DAA.zip (DAA.use is) (DAA.use ss)
89
90
91 normalizeGlobal :: [Scored a] -> [Scored a]
92 normalizeGlobal ss = map (\(Scored t s1 s2)
93 -> Scored t ((s1 - s1min) / s1max)
94 ((s2 - s2min) / s2max)) ss
95 where
96 ss1 = map _scored_genInc ss
97 ss2 = map _scored_speExc ss
98
99 s1min = minimum ss1
100 s1max = maximum ss1
101
102 s2min = minimum ss2
103 s2max = maximum ss2
104
105
106
107 normalizeLocal :: Scored a -> Scored a
108 normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
109 where
110 log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
111
112
113
114 -- | Type Instances
115 makeLenses 'Scored