]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Metrics.hs
[TextFlow] SpeGen scores (WIP)
[gargantext.git] / src / Gargantext / Core / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Core.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12 -}
13
14 {-# LANGUAGE BangPatterns #-}
15
16 module Gargantext.Core.Text.Metrics
17 where
18
19 --import Data.Array.Accelerate ((:.)(..), Z(..))
20 --import Math.KMeans (kmeans, euclidSq, elements)
21
22 --import GHC.Float (exp)
23 import Data.Tuple.Extra (both)
24 import Data.Map (Map)
25 import Data.List.Extra (sortOn)
26 import Gargantext.Prelude
27 import Gargantext.Core.Viz.Graph.Distances.Matrice
28 import Gargantext.Core.Viz.Graph.Index
29 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
30 import qualified Data.Array.Accelerate as DAA
31 import qualified Data.Array.Accelerate.Interpreter as DAA
32 import qualified Data.List as List
33 import qualified Data.Map as Map
34
35 import qualified Data.Vector.Storable as Vec
36
37 type MapListSize = Int
38 type InclusionSize = Int
39
40 {-
41 toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
42 toScored' = map2scored
43 . (pcaReduceTo (Dimension 2))
44 . (Map.filter (\v -> Vec.length v > 1))
45 . (Map.unionsWith (<>))
46 -}
47
48 scored :: Ord t => Map (t,t) Int -> [Scored t]
49 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
50 where
51 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
52 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
53
54 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
55 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
56
57 -- TODO change type with (x,y)
58 data Scored ts = Scored
59 { _scored_terms :: !ts
60 , _scored_genInc :: !GenericityInclusion
61 , _scored_speExc :: !SpecificityExclusion
62 } deriving (Show)
63
64 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
65 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
66 (Map.toList fi)
67 scores
68 where
69 (ti, fi) = createIndices m
70 (is, ss) = incExcSpeGen $ cooc2mat ti m
71 scores = DAA.toList
72 $ DAA.run
73 $ DAA.zip (DAA.use is) (DAA.use ss)
74
75 -- TODO Code to be removed below
76 -- TODO in the textflow we end up needing these indices , it might be
77 -- better to compute them earlier and pass them around.
78 scored' :: Ord t => Map (t,t) Int -> [Scored t]
79 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
80 where
81 (ti, fi) = createIndices m
82 (is, ss) = incExcSpeGen $ cooc2mat ti m
83 scores = DAA.toList
84 $ DAA.run
85 $ DAA.zip (DAA.use is) (DAA.use ss)
86
87
88 takeScored :: Ord t => MapListSize -> InclusionSize -> Map (t,t) Int -> ([t],[t])
89 takeScored listSize incSize = both (map _scored_terms)
90 . takeLinear listSize incSize _scored_genInc
91 _scored_speExc
92 . scored
93
94
95 -- | Filter Scored data
96 -- >>> takeLinear 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
97 -- [(3,8),(6,5)]
98 takeLinear :: (Ord b1, Ord b2)
99 => MapListSize -> InclusionSize
100 -> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
101 takeLinear mls incSize speGen incExc = (List.splitAt mls)
102 . List.concat
103 . map (take $ round
104 $ (fromIntegral mls :: Double)
105 / (fromIntegral incSize :: Double)
106 )
107 . map (sortOn speGen)
108 . splitEvery incSize
109 . take 5000
110 . takePercent (0.70)
111 . sortOn incExc
112
113 takePercent :: Double -> [a] -> [a]
114 takePercent l xs = List.take l' xs
115 where
116 l' = round $ l * (fromIntegral $ List.length xs)
117
118 splitTake :: (Int, a -> Bool) -> (Int, a -> Bool) -> [a] -> ([a], [a])
119 splitTake (a, af) (b, bf) xs = (mpa <> mpb, ca <> cb)
120 where
121 (mpa, ca) = List.splitAt a $ List.filter af xs
122 (mpb, cb) = List.splitAt b $ List.filter bf xs
123