]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[OPTIM] after profiling, optimize serialisation
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
14
15 {-# LANGUAGE FlexibleContexts #-}
16 {-# LANGUAGE NoImplicitPrelude #-}
17 {-# LANGUAGE OverloadedStrings #-}
18 {-# LANGUAGE RankNTypes #-}
19
20 module Gargantext.Prelude
21 ( module Gargantext.Prelude
22 , module Protolude
23 , headMay, lastMay
24 , module GHC.Err.Located
25 , module Text.Show
26 , module Text.Read
27 , cs
28 , module Data.Maybe
29 , round
30 , sortWith
31 , module Prelude
32 , MonadBase(..)
33 )
34 where
35
36 import Control.Monad.Base (MonadBase(..))
37 import GHC.Exts (sortWith)
38 import GHC.Err.Located (undefined)
39 import GHC.Real (round)
40 import Data.Map (Map, lookup)
41 import Data.Maybe (isJust, fromJust, maybe)
42 import Data.Text (Text)
43 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
44 , Fractional, Num, Maybe(Just,Nothing)
45 , Enum, Bounded, Float
46 , Floating, Char, IO
47 , pure, (>>=), (=<<), (<*>), (<$>), (>>)
48 , head, flip
49 , Ord, Integral, Foldable, RealFrac, Monad, filter
50 , reverse, map, mapM, zip, drop, take, zipWith
51 , sum, fromIntegral, length, fmap, foldl, foldl'
52 , takeWhile, sqrt, identity
53 , abs, min, max, maximum, minimum, return, snd, truncate
54 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
55 , Eq, (==), (>=), (<=), (<>), (/=)
56 , (&&), (||), not, any, all
57 , concatMap
58 , fst, snd, toS
59 , elem, die, mod, div, const, either
60 , curry, uncurry, repeat
61 , otherwise, when
62 , IO()
63 , compare
64 , on
65 , panic
66 )
67
68 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
69 -- TODO import functions optimized in Utils.Count
70 -- import Protolude hiding (head, last, all, any, sum, product, length)
71 -- import Gargantext.Utils.Count
72 import qualified Data.List as L hiding (head, sum)
73 import qualified Control.Monad as M
74 import qualified Data.Map as M
75 import Data.Map.Strict (insertWith)
76 import qualified Data.Vector as V
77 import Safe (headMay, lastMay, initMay, tailMay)
78 import Text.Show (Show(), show)
79 import Text.Read (Read())
80 import Data.String.Conversions (cs)
81
82
83 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
84 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
85 -- printDebug _ _ = pure ()
86
87
88 -- | splitEvery n == chunkAlong n n
89 splitEvery :: Int -> [a] -> [[a]]
90 splitEvery _ [] = []
91 splitEvery n xs =
92 let (h,t) = L.splitAt n xs
93 in h : splitEvery n t
94
95 type Grain = Int
96 type Step = Int
97
98 -- | Function to split a range into chunks
99 -- if step == grain then linearity (splitEvery)
100 -- elif step < grain then overlapping
101 -- else dotted with holes
102 -- TODO FIX BUG if Steps*Grain /= length l
103 -- chunkAlong 10 10 [1..15] == [1..10]
104 -- BUG: what about the rest of (divMod 15 10)?
105 -- TODO: chunkAlongNoRest or chunkAlongWithRest
106 -- default behavior: NoRest
107
108 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
109 chunkAlong a b l = case a >= length l of
110 True -> [l]
111 False -> chunkAlong' a b l
112
113 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
114 chunkAlong' a b l = case a > 0 && b > 0 of
115 True -> chunkAlong'' a b l
116 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
117
118 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
119 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
120 where
121 only = map (take a)
122 while = takeWhile (\x -> length x >= a)
123 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
124
125 -- | Optimized version (Vector)
126 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
127 chunkAlongV a b l = only (while dropAlong)
128 where
129 only = V.map (V.take a)
130 while = V.takeWhile (\x -> V.length x >= a)
131 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
132
133 -- | TODO Inverse of chunk ? unchunkAlong ?
134 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
135 -- unchunkAlong = undefined
136
137
138 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
139 splitAlong :: [Int] -> [Char] -> [[Char]]
140 splitAlong _ [] = [] -- No list? done
141 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
142 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
143 -- take until our split spot, recurse with next split spot and list remainder
144
145 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
146 takeWhileM _ [] = return []
147 takeWhileM p (a:as) = do
148 v <- a
149 if p v
150 then do
151 vs <- takeWhileM p as
152 return (v:vs)
153 else return []
154
155 -- SUMS
156 -- To select the right algorithme according to the type:
157 -- https://github.com/mikeizbicki/ifcxt
158
159 sumSimple :: Num a => [a] -> a
160 sumSimple = L.foldl' (+) 0
161
162 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
163 sumKahan :: Num a => [a] -> a
164 sumKahan = snd . L.foldl' go (0,0)
165 where
166 go (c,t) i = ((t'-t)-y,t')
167 where
168 y = i-c
169 t' = t+y
170
171 -- | compute part of the dict
172 count2map :: (Ord k, Foldable t) => t k -> Map k Double
173 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
174
175 -- | insert in a dict
176 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
177 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
178
179 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
180 trunc n = truncate . (* 10^n)
181
182 trunc' :: Int -> Double -> Double
183 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
184
185 ------------------------------------------------------------------------
186 bool2num :: Num a => Bool -> a
187 bool2num True = 1
188 bool2num False = 0
189
190 bool2double :: Bool -> Double
191 bool2double = bool2num
192
193 bool2int :: Bool -> Int
194 bool2int = bool2num
195 ------------------------------------------------------------------------
196
197 -- Normalizing && scaling data
198 scale :: [Double] -> [Double]
199 scale = scaleMinMax
200
201 scaleMinMax :: [Double] -> [Double]
202 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
203 where
204 ma = maximum xs'
205 mi = minimum xs'
206 xs' = map abs xs
207
208 scaleNormalize :: [Double] -> [Double]
209 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
210 where
211 v = variance xs'
212 m = mean xs'
213 xs' = map abs xs
214
215 normalize :: [Double] -> [Double]
216 normalize as = normalizeWith identity as
217
218 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
219 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
220 where
221 bs' = map extract bs
222
223 -- Zip functions to add
224 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
225 zipFst f xs = zip (f xs) xs
226
227 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
228 zipSnd f xs = zip xs (f xs)
229
230 -- | maximumWith
231 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
232 maximumWith f = L.maximumBy (compare `on` f)
233
234 -- | To get all combinations of a list with no
235 -- repetition and apply a function to the resulting list of pairs
236 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
237 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
238
239 ------------------------------------------------------------------------
240 -- Empty List Sugar Error Handling
241 -- TODO add Garg Monad Errors
242
243 listSafe1 :: Text -> ([a] -> Maybe a)
244 -> Text -> [a] -> a
245 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
246 where
247 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
248
249 head' :: Text -> [a] -> a
250 head' = listSafe1 "head" headMay
251
252 last' :: Text -> [a] -> a
253 last' = listSafe1 "last" lastMay
254
255 ------------------------------------------------------------------------
256
257 listSafeN :: Text -> ([a] -> Maybe [a])
258 -> Text -> [a] -> [a]
259 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
260 where
261 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
262
263 tail' :: Text -> [a] -> [a]
264 tail' = listSafeN "tail" tailMay
265
266 init' :: Text -> [a] -> [a]
267 init' = listSafeN "init" initMay
268
269 ------------------------------------------------------------------------
270 --- Some Statistics sugar functions
271 -- Exponential Average
272 eavg :: [Double] -> Double
273 eavg (x:xs) = a*x + (1-a)*(eavg xs)
274 where a = 0.70
275 eavg [] = 0
276
277 -- Simple Average
278 mean :: Fractional a => [a] -> a
279 mean xs = sum xs / fromIntegral (length xs)
280
281 sumMaybe :: Num a => [Maybe a] -> Maybe a
282 sumMaybe = fmap sum . M.sequence
283
284 variance :: Floating a => [a] -> a
285 variance xs = sum ys / (fromIntegral (length xs) - 1)
286 where
287 m = mean xs
288 ys = map (\x -> (x - m) ** 2) xs
289
290 deviation :: Floating a => [a] -> a
291 deviation = sqrt . variance
292
293 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
294 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
295
296 ma :: [Double] -> [Double]
297 ma = movingAverage 3
298
299 -----------------------------------------------------------------------
300 -----------------------------------------------------------------------
301 --- Map in Map = Map2
302 -- To avoid Map (a,a) b
303 type Map2 a b = Map a (Map a b)
304
305 lookup2 :: Ord a
306 => a
307 -> a
308 -> Map2 a b
309 -> Maybe b
310 lookup2 a b m = do
311 m' <- lookup a m
312 lookup b m'
313
314