]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[FIX] memory leak, useable ngrams table version (WIP)
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
14
15 module Gargantext.Prelude
16 ( module Gargantext.Prelude
17 , module Protolude
18 , headMay, lastMay
19 , module GHC.Err.Located
20 , module Text.Show
21 , module Text.Read
22 , cs
23 , module Data.Maybe
24 , round
25 , sortWith
26 , module Prelude
27 , MonadBase(..)
28 , Typeable
29 )
30 where
31
32 import Control.Monad.Base (MonadBase(..))
33 import GHC.Exts (sortWith)
34 import GHC.Err.Located (undefined)
35 import GHC.Real (round)
36 import Data.Map (Map, lookup)
37 import Data.Maybe (isJust, fromJust, maybe)
38 import Data.Text (Text)
39 import Data.Typeable (Typeable)
40 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
41 , Fractional, Num, Maybe(Just,Nothing)
42 , Enum, Bounded, Float
43 , Floating, Char, IO
44 , pure, (>>=), (=<<), (<*>), (<$>), (<&>), (>>)
45 , head, flip
46 , Ord, Integral, Foldable, RealFrac, Monad, filter
47 , reverse, map, mapM, zip, drop, take, zipWith
48 , sum, fromIntegral, length, fmap, foldl, foldl'
49 , takeWhile, sqrt, identity
50 , abs, min, max, maximum, minimum, return, snd, truncate
51 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
52 , Eq, (==), (>=), (<=), (<>), (/=)
53 , (&&), (||), not, any, all
54 , concatMap
55 , fst, snd, toS
56 , elem, die, mod, div, const, either
57 , curry, uncurry, repeat
58 , otherwise, when
59 , IO()
60 , compare
61 , on
62 , panic
63 , seq
64 )
65
66 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
67 -- TODO import functions optimized in Utils.Count
68 -- import Protolude hiding (head, last, all, any, sum, product, length)
69 -- import Gargantext.Utils.Count
70 import qualified Data.List as L hiding (head, sum)
71 import qualified Control.Monad as M
72 import qualified Data.Map as M
73 import Data.Map.Strict (insertWith)
74 import qualified Data.Vector as V
75 import Safe (headMay, lastMay, initMay, tailMay)
76 import Text.Show (Show(), show)
77 import Text.Read (Read())
78 import Data.String.Conversions (cs)
79
80
81 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
82 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
83 -- printDebug _ _ = pure ()
84
85
86 -- | splitEvery n == chunkAlong n n
87 splitEvery :: Int -> [a] -> [[a]]
88 splitEvery _ [] = []
89 splitEvery n xs =
90 let (h,t) = L.splitAt n xs
91 in h : splitEvery n t
92
93 type Grain = Int
94 type Step = Int
95
96 -- | Function to split a range into chunks
97 -- if step == grain then linearity (splitEvery)
98 -- elif step < grain then overlapping
99 -- else dotted with holes
100 -- TODO FIX BUG if Steps*Grain /= length l
101 -- chunkAlong 10 10 [1..15] == [1..10]
102 -- BUG: what about the rest of (divMod 15 10)?
103 -- TODO: chunkAlongNoRest or chunkAlongWithRest
104 -- default behavior: NoRest
105
106 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
107 chunkAlong a b l = case a >= length l of
108 True -> [l]
109 False -> chunkAlong' a b l
110
111 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
112 chunkAlong' a b l = case a > 0 && b > 0 of
113 True -> chunkAlong'' a b l
114 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
115
116 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
117 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
118 where
119 only = map (take a)
120 while = takeWhile (\x -> length x >= a)
121 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
122
123 -- | Optimized version (Vector)
124 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
125 chunkAlongV a b l = only (while dropAlong)
126 where
127 only = V.map (V.take a)
128 while = V.takeWhile (\x -> V.length x >= a)
129 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
130
131 -- | TODO Inverse of chunk ? unchunkAlong ?
132 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
133 -- unchunkAlong = undefined
134
135
136 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
137 splitAlong :: [Int] -> [Char] -> [[Char]]
138 splitAlong _ [] = [] -- No list? done
139 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
140 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
141 -- take until our split spot, recurse with next split spot and list remainder
142
143 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
144 takeWhileM _ [] = return []
145 takeWhileM p (a:as) = do
146 v <- a
147 if p v
148 then do
149 vs <- takeWhileM p as
150 return (v:vs)
151 else return []
152
153 -- SUMS
154 -- To select the right algorithme according to the type:
155 -- https://github.com/mikeizbicki/ifcxt
156
157 sumSimple :: Num a => [a] -> a
158 sumSimple = L.foldl' (+) 0
159
160 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
161 sumKahan :: Num a => [a] -> a
162 sumKahan = snd . L.foldl' go (0,0)
163 where
164 go (c,t) i = ((t'-t)-y,t')
165 where
166 y = i-c
167 t' = t+y
168
169 -- | compute part of the dict
170 count2map :: (Ord k, Foldable t) => t k -> Map k Double
171 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
172
173 -- | insert in a dict
174 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
175 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
176
177 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
178 trunc n = truncate . (* 10^n)
179
180 trunc' :: Int -> Double -> Double
181 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
182
183 ------------------------------------------------------------------------
184 bool2num :: Num a => Bool -> a
185 bool2num True = 1
186 bool2num False = 0
187
188 bool2double :: Bool -> Double
189 bool2double = bool2num
190
191 bool2int :: Bool -> Int
192 bool2int = bool2num
193 ------------------------------------------------------------------------
194
195 -- Normalizing && scaling data
196 scale :: [Double] -> [Double]
197 scale = scaleMinMax
198
199 scaleMinMax :: [Double] -> [Double]
200 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
201 where
202 ma = maximum xs'
203 mi = minimum xs'
204 xs' = map abs xs
205
206 scaleNormalize :: [Double] -> [Double]
207 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
208 where
209 v = variance xs'
210 m = mean xs'
211 xs' = map abs xs
212
213 normalize :: [Double] -> [Double]
214 normalize as = normalizeWith identity as
215
216 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
217 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
218 where
219 bs' = map extract bs
220
221 -- Zip functions to add
222 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
223 zipFst f xs = zip (f xs) xs
224
225 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
226 zipSnd f xs = zip xs (f xs)
227
228 -- | maximumWith
229 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
230 maximumWith f = L.maximumBy (compare `on` f)
231
232 -- | To get all combinations of a list with no
233 -- repetition and apply a function to the resulting list of pairs
234 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
235 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
236
237 ------------------------------------------------------------------------
238 -- Empty List Sugar Error Handling
239 -- TODO add Garg Monad Errors
240
241 listSafe1 :: Text -> ([a] -> Maybe a)
242 -> Text -> [a] -> a
243 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
244 where
245 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
246
247 head' :: Text -> [a] -> a
248 head' = listSafe1 "head" headMay
249
250 last' :: Text -> [a] -> a
251 last' = listSafe1 "last" lastMay
252
253 ------------------------------------------------------------------------
254
255 listSafeN :: Text -> ([a] -> Maybe [a])
256 -> Text -> [a] -> [a]
257 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
258 where
259 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
260
261 tail' :: Text -> [a] -> [a]
262 tail' = listSafeN "tail" tailMay
263
264 init' :: Text -> [a] -> [a]
265 init' = listSafeN "init" initMay
266
267 ------------------------------------------------------------------------
268 --- Some Statistics sugar functions
269 -- Exponential Average
270 eavg :: [Double] -> Double
271 eavg (x:xs) = a*x + (1-a)*(eavg xs)
272 where a = 0.70
273 eavg [] = 0
274
275 -- Simple Average
276 mean :: Fractional a => [a] -> a
277 mean xs = sum xs / fromIntegral (length xs)
278
279 sumMaybe :: Num a => [Maybe a] -> Maybe a
280 sumMaybe = fmap sum . M.sequence
281
282 variance :: Floating a => [a] -> a
283 variance xs = sum ys / (fromIntegral (length xs) - 1)
284 where
285 m = mean xs
286 ys = map (\x -> (x - m) ** 2) xs
287
288 deviation :: Floating a => [a] -> a
289 deviation = sqrt . variance
290
291 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
292 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
293
294 -----------------------------------------------------------------------
295 -----------------------------------------------------------------------
296 --- Map in Map = Map2
297 -- To avoid Map (a,a) b
298 type Map2 a b = Map a (Map a b)
299
300 lookup2 :: Ord a
301 => a
302 -> a
303 -> Map2 a b
304 -> Maybe b
305 lookup2 a b m = do
306 m' <- lookup a m
307 lookup b m'
308
309 -----------------------------------------------
310
311 foldM' :: (Monad m) => (a -> b -> m a) -> a -> [b] -> m a
312 foldM' _ z [] = return z
313 foldM' f z (x:xs) = do
314 z' <- f z x
315 z' `seq` foldM' f z' xs
316
317