]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Metrics.hs
[FIX] working for all ngrams but NgramsTerms with simple Tree (1 depth)
[gargantext.git] / src / Gargantext / Core / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Core.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12 -}
13
14 {-# LANGUAGE BangPatterns #-}
15
16 module Gargantext.Core.Text.Metrics
17 where
18
19 --import Data.Array.Accelerate ((:.)(..), Z(..))
20 --import Math.KMeans (kmeans, euclidSq, elements)
21 import Data.Map (Map)
22 import Gargantext.Prelude
23 import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
24 import Gargantext.Core.Viz.Graph.Index
25 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
26 import qualified Data.Array.Accelerate as DAA
27 import qualified Data.Array.Accelerate.Interpreter as DAA
28 import qualified Data.Map as Map
29
30 import qualified Data.Vector.Storable as Vec
31
32 type MapListSize = Int
33 type InclusionSize = Int
34
35 scored :: Ord t => Map (t,t) Int -> [Scored t]
36 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
37 where
38 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
39 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
40
41 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
42 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
43
44 -- TODO change type with (x,y)
45 data Scored ts = Scored
46 { _scored_terms :: !ts
47 , _scored_genInc :: !GenericityInclusion
48 , _scored_speExc :: !SpecificityExclusion
49 } deriving (Show)
50
51
52 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
53 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
54 (Map.toList fi)
55 scores
56 where
57 (ti, fi) = createIndices m
58 (is, ss) = incExcSpeGen $ cooc2mat ti m
59 scores = DAA.toList
60 $ DAA.run
61 $ DAA.zip (DAA.use is) (DAA.use ss)
62
63 -- TODO Code to be removed below
64 -- TODO in the textflow we end up needing these indices , it might be
65 -- better to compute them earlier and pass them around.
66 scored' :: Ord t => Map (t,t) Int -> [Scored t]
67 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
68 where
69 (ti, fi) = createIndices m
70 (is, ss) = incExcSpeGen $ cooc2mat ti m
71 scores = DAA.toList
72 $ DAA.run
73 $ DAA.zip (DAA.use is) (DAA.use ss)
74
75
76 normalizeGlobal :: [Scored a] -> [Scored a]
77 normalizeGlobal ss = map (\(Scored t s1 s2)
78 -> Scored t ((s1 - s1min) / s1max)
79 ((s2 - s2min) / s2max)) ss
80 where
81 ss1 = map _scored_genInc ss
82 ss2 = map _scored_speExc ss
83
84 s1min = minimum ss1
85 s1max = maximum ss1
86
87 s2min = minimum ss2
88 s2max = maximum ss2
89
90
91
92 normalizeLocal :: Scored a -> Scored a
93 normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
94 where
95 log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
96
97
98
99
100