]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics.hs
[REFACT] before scoring new ngrams lists.
[gargantext.git] / src / Gargantext / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12 -}
13
14 {-# LANGUAGE BangPatterns #-}
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE OverloadedStrings #-}
17
18 module Gargantext.Text.Metrics
19 where
20
21 --import Data.Array.Accelerate ((:.)(..), Z(..))
22 --import Math.KMeans (kmeans, euclidSq, elements)
23
24 --import GHC.Float (exp)
25
26 import Data.Map (Map)
27 import Data.List.Extra (sortOn)
28 import GHC.Real (round)
29 import Gargantext.Prelude
30 import Gargantext.Viz.Graph.Distances.Matrice
31 import Gargantext.Viz.Graph.Index
32 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
33 import qualified Data.Array.Accelerate as DAA
34 import qualified Data.Array.Accelerate.Interpreter as DAA
35 import qualified Data.List as List
36 import qualified Data.Map as Map
37
38 import qualified Data.Vector.Storable as Vec
39
40 type GraphListSize = Int
41 type InclusionSize = Int
42
43 toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
44 toScored = map2scored
45 . (pcaReduceTo (Dimension 2))
46 . (Map.filter (\v -> Vec.length v > 1))
47 . (Map.unionsWith (<>))
48
49
50 scored :: Ord t => Map (t,t) Int -> [Scored t]
51 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
52
53 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
54 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
55
56 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
57 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
58
59 -- TODO change type with (x,y)
60 data Scored ts = Scored
61 { _scored_terms :: !ts
62 , _scored_incExc :: !InclusionExclusion
63 , _scored_speGen :: !SpecificityGenericity
64 } deriving (Show)
65
66 localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
67 localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
68 (Map.toList fi)
69 scores
70 where
71 (ti, fi) = createIndices m
72 (is, ss) = incExcSpeGen $ cooc2mat ti m
73 scores = DAA.toList
74 $ DAA.run
75 $ DAA.zip (DAA.use is) (DAA.use ss)
76
77
78 -- TODO Code to be remove below
79 -- TODO in the textflow we end up needing these indices , it might be
80 -- better to compute them earlier and pass them around.
81 scored' :: Ord t => Map (t,t) Int -> [Scored t]
82 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
83 where
84 (ti, fi) = createIndices m
85 (is, ss) = incExcSpeGen $ cooc2mat ti m
86 scores = DAA.toList
87 $ DAA.run
88 $ DAA.zip (DAA.use is) (DAA.use ss)
89
90
91 takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
92 takeScored listSize incSize = map _scored_terms
93 . linearTakes listSize incSize _scored_speGen
94 _scored_incExc
95 . scored
96
97
98 -- | Filter Scored data
99 -- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
100 -- [(3,8),(6,5)]
101 linearTakes :: (Ord b1, Ord b2)
102 => GraphListSize -> InclusionSize
103 -> (a -> b2) -> (a -> b1) -> [a] -> [a]
104 linearTakes gls incSize speGen incExc = take gls
105 . List.concat
106 . map (take $ round
107 $ (fromIntegral gls :: Double)
108 / (fromIntegral incSize :: Double)
109 )
110 . map (sortOn incExc)
111 . splitEvery incSize
112 . sortOn speGen
113