]> Git — Sourcephile - gargantext.git/blob - src/Data/Gargantext/Prelude.hs
[FEAT] Ngrams metrics, thanks to text-metrics to begin with.
[gargantext.git] / src / Data / Gargantext / Prelude.hs
1 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
2 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
3 {-# LANGUAGE NoImplicitPrelude #-}
4
5 module Data.Gargantext.Prelude where
6
7 import Protolude (Bool(True, False), Int, Double, Integer, Fractional, Num, Maybe, Floating, Char, Ord, Integral, Foldable, RealFrac, Monad, filter,
8 reverse
9 , map
10 , zip
11 , drop
12 , take
13 , zipWith
14 , sum
15 , fromIntegral
16 , length
17 , fmap
18 , takeWhile
19 , sqrt
20 , undefined
21 , identity
22 , abs
23 , maximum
24 , minimum
25 , return
26 , snd
27 , truncate
28 , (+), (*), (/), (-), (.), (>=), ($), (**), (^)
29 )
30
31 -- TODO import functions optimized in Utils.Count
32 -- import Protolude hiding (head, last, all, any, sum, product, length)
33 -- import Data.Gargantext.Utils.Count
34
35 import qualified Data.List as L hiding (head, sum)
36 import qualified Control.Monad as M
37 import qualified Data.Map as Map
38 import qualified Data.Vector as V
39
40 pf :: (a -> Bool) -> [a] -> [a]
41 pf = filter
42
43 pr :: [a] -> [a]
44 pr = reverse
45
46 pm :: (a -> b) -> [a] -> [b]
47 pm = map
48
49 pm2 :: (t -> b) -> [[t]] -> [[b]]
50 pm2 fun = pm (pm fun)
51
52 pz :: [a] -> [b] -> [(a, b)]
53 pz = zip
54
55 pd :: Int -> [a] -> [a]
56 pd = drop
57
58 ptk :: Int -> [a] -> [a]
59 ptk = take
60
61 pzw :: (a -> b -> c) -> [a] -> [b] -> [c]
62 pzw = zipWith
63
64 -- Exponential Average
65 eavg :: [Double] -> Double
66 eavg (x:xs) = a*x + (1-a)*(eavg xs)
67 where a = 0.70
68 eavg [] = 0
69
70 -- Simple Average
71 mean :: Fractional a => [a] -> a
72 mean xs = if L.null xs then 0.0
73 else sum xs / fromIntegral (length xs)
74
75 sumMaybe :: Num a => [Maybe a] -> Maybe a
76 sumMaybe = fmap sum . M.sequence
77
78 variance :: Floating a => [a] -> a
79 variance xs = mean $ pm (\x -> (x - m) ** 2) xs where
80 m = mean xs
81
82 deviation :: [Double] -> Double
83 deviation = sqrt . variance
84
85 movingAverage :: Fractional b => Int -> [b] -> [b]
86 movingAverage steps xs = pm mean $ chunkAlong steps 1 xs
87
88 ma :: [Double] -> [Double]
89 ma = movingAverage 3
90
91
92 -- | Function to split a range into chunks
93 chunkAlong :: Int -> Int -> [a] -> [[a]]
94 chunkAlong a b l = only (while dropAlong)
95 where
96 only = pm (take a)
97 while = takeWhile (\x -> length x >= a)
98 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
99
100 -- | Optimized version (Vector)
101 chunkAlong' :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
102 chunkAlong' a b l = only (while dropAlong)
103 where
104 only = V.map (V.take a)
105 while = V.takeWhile (\x -> V.length x >= a)
106 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
107
108 -- | TODO Inverse of chunk ? unchunkAlong ?
109 unchunkAlong :: Int -> Int -> [[a]] -> [a]
110 unchunkAlong = undefined
111
112
113 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
114 splitAlong :: [Int] -> [Char] -> [[Char]]
115 splitAlong _ [] = [] -- No list? done
116 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
117 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys) -- take until our split spot, recurse with next split spot and list remainder
118
119 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
120 takeWhileM _ [] = return []
121 takeWhileM p (a:as) = do
122 v <- a
123 if p v
124 then do
125 vs <- takeWhileM p as
126 return (v:vs)
127 else return []
128
129 -- SUMS
130 -- To select the right algorithme according to the type:
131 -- https://github.com/mikeizbicki/ifcxt
132
133 sumSimple :: Num a => [a] -> a
134 sumSimple = L.foldl' (+) 0
135
136 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
137 sumKahan :: Num a => [a] -> a
138 sumKahan = snd . L.foldl' go (0,0)
139 where
140 go (c,t) i = ((t'-t)-y,t')
141 where
142 y = i-c
143 t' = t+y
144
145 -- | compute part of the dict
146 count2map :: (Ord k, Foldable t) => t k -> Map.Map k Double
147 count2map xs = Map.map (/ (fromIntegral (length xs))) (count2map' xs)
148
149 -- | insert in a dict
150 count2map' :: (Ord k, Foldable t) => t k -> Map.Map k Double
151 count2map' xs = L.foldl' (\x y -> Map.insertWith' (+) y 1 x) Map.empty xs
152
153
154 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
155 trunc n = truncate . (* 10^n)
156
157 trunc' :: Int -> Double -> Double
158 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
159
160
161 bool2int :: Num a => Bool -> a
162 bool2int b = case b of
163 True -> 1
164 False -> 0
165
166 bool2double :: Bool -> Double
167 bool2double bool = case bool of
168 True -> 1.0
169 False -> 0.0
170
171
172
173 -- Normalizing && scaling data
174 scale :: [Double] -> [Double]
175 scale = scaleMinMax
176
177 scaleMinMax :: [Double] -> [Double]
178 scaleMinMax xs = pm (\x -> (x - mi / (ma - mi + 1) )) xs'
179 where
180 ma = maximum xs'
181 mi = minimum xs'
182 xs' = pm abs xs
183
184 scaleNormalize :: [Double] -> [Double]
185 scaleNormalize xs = pm (\x -> (x - v / (m + 1))) xs'
186 where
187 v = variance xs'
188 m = mean xs'
189 xs' = pm abs xs
190
191
192
193 normalize :: [Double] -> [Double]
194 normalize as = normalizeWith identity as
195
196 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
197 normalizeWith extract bs = pm (\x -> x/(sum bs')) bs'
198 where
199 bs' = pm extract bs
200
201 -- Zip functions to add
202 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
203 zipFst f xs = zip (f xs) xs
204
205 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
206 zipSnd f xs = zip xs (f xs)