]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
Merge branch 'dev' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell-gargantext...
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 {-# OPTIONS_GHC -fno-warn-orphans #-}
14 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
15
16 module Gargantext.Prelude
17 ( module Gargantext.Prelude
18 , module Protolude
19 , module GHC.Err.Located
20 , module Text.Show
21 , module Text.Read
22 , module Data.Maybe
23 , module Prelude
24 , MonadBase(..)
25 , Typeable
26 , cs
27 , headMay, lastMay, sortWith
28 , round
29 )
30 where
31
32 import Control.Monad.Base (MonadBase(..))
33 import Data.Set (Set)
34 import GHC.Exts (sortWith)
35 import GHC.Err.Located (undefined)
36 import GHC.Real (round)
37 import Data.Map (Map, lookup)
38 import Data.Maybe (isJust, fromJust, maybe)
39 import Data.Monoid (Monoid, mempty)
40 import Data.Semigroup (Semigroup, (<>))
41 import Data.Text (Text)
42 import Data.Typeable (Typeable)
43 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
44 , Fractional, Num, Maybe(Just,Nothing)
45 , Enum, Bounded, Float
46 , Floating, Char, IO
47 , Functor(..)
48 , pure, (>>=), (=<<), (<*>), (<$>), (<&>), (>>)
49 , head, flip
50 , Ord, Integral, Foldable, RealFrac, Monad, filter
51 , reverse, map, mapM, zip, drop, take, zipWith
52 , sum, fromIntegral, length, fmap, foldl, foldl'
53 , takeWhile, sqrt, identity
54 , abs, min, max, maximum, minimum, return, snd, truncate
55 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
56 , Eq, (==), (>=), (<=), (<>), (/=), xor
57 , (&&), (||), not, any, all
58 , concatMap
59 , fst, snd, toS
60 , elem, die, mod, div, const, either
61 , curry, uncurry, repeat
62 , otherwise, when
63 , IO()
64 , compare
65 , on
66 , panic
67 , seq
68 )
69
70 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
71 -- TODO import functions optimized in Utils.Count
72 -- import Protolude hiding (head, last, all, any, sum, product, length)
73 -- import Gargantext.Utils.Count
74 import Data.Map.Strict (insertWith)
75 import Data.String.Conversions (cs)
76 import Safe (headMay, lastMay, initMay, tailMay)
77 import Text.Read (Read())
78 import Text.Show (Show(), show)
79 import qualified Control.Monad as M
80 import qualified Data.List as L hiding (head, sum)
81 import qualified Data.Map as M
82 import qualified Data.Set as Set
83 import qualified Data.Vector as V
84
85
86 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
87 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
88 -- printDebug _ _ = pure ()
89
90
91 -- | splitEvery n == chunkAlong n n
92 splitEvery :: Int -> [a] -> [[a]]
93 splitEvery _ [] = []
94 splitEvery n xs =
95 let (h,t) = L.splitAt n xs
96 in h : splitEvery n t
97
98 type Grain = Int
99 type Step = Int
100
101 -- | Function to split a range into chunks
102 -- if step == grain then linearity (splitEvery)
103 -- elif step < grain then overlapping
104 -- else dotted with holes
105 -- TODO FIX BUG if Steps*Grain /= length l
106 -- chunkAlong 10 10 [1..15] == [1..10]
107 -- BUG: what about the rest of (divMod 15 10)?
108 -- TODO: chunkAlongNoRest or chunkAlongWithRest
109 -- default behavior: NoRest
110
111 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
112 chunkAlong a b l = case a >= length l of
113 True -> [l]
114 False -> chunkAlong' a b l
115
116 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
117 chunkAlong' a b l = case a > 0 && b > 0 of
118 True -> chunkAlong'' a b l
119 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
120
121 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
122 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
123 where
124 only = map (take a)
125 while = takeWhile (\x -> length x >= a)
126 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
127
128 -- | Optimized version (Vector)
129 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
130 chunkAlongV a b l = only (while dropAlong)
131 where
132 only = V.map (V.take a)
133 while = V.takeWhile (\x -> V.length x >= a)
134 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
135
136 -- | TODO Inverse of chunk ? unchunkAlong ?
137 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
138 -- unchunkAlong = undefined
139
140
141 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
142 splitAlong :: [Int] -> [Char] -> [[Char]]
143 splitAlong _ [] = [] -- No list? done
144 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
145 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
146 -- take until our split spot, recurse with next split spot and list remainder
147
148 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
149 takeWhileM _ [] = return []
150 takeWhileM p (a:as) = do
151 v <- a
152 if p v
153 then do
154 vs <- takeWhileM p as
155 return (v:vs)
156 else return []
157
158 -- SUMS
159 -- To select the right algorithme according to the type:
160 -- https://github.com/mikeizbicki/ifcxt
161
162 sumSimple :: Num a => [a] -> a
163 sumSimple = L.foldl' (+) 0
164
165 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
166 sumKahan :: Num a => [a] -> a
167 sumKahan = snd . L.foldl' go (0,0)
168 where
169 go (c,t) i = ((t'-t)-y,t')
170 where
171 y = i-c
172 t' = t+y
173
174 -- | compute part of the dict
175 count2map :: (Ord k, Foldable t) => t k -> Map k Double
176 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
177
178 -- | insert in a dict
179 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
180 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
181
182 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
183 trunc n = truncate . (* 10^n)
184
185 trunc' :: Int -> Double -> Double
186 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
187
188 ------------------------------------------------------------------------
189 bool2num :: Num a => Bool -> a
190 bool2num True = 1
191 bool2num False = 0
192
193 bool2double :: Bool -> Double
194 bool2double = bool2num
195
196 bool2int :: Bool -> Int
197 bool2int = bool2num
198 ------------------------------------------------------------------------
199
200 -- Normalizing && scaling data
201 scale :: [Double] -> [Double]
202 scale = scaleMinMax
203
204 scaleMinMax :: [Double] -> [Double]
205 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
206 where
207 ma = maximum xs'
208 mi = minimum xs'
209 xs' = map abs xs
210
211 scaleNormalize :: [Double] -> [Double]
212 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
213 where
214 v = variance xs'
215 m = mean xs'
216 xs' = map abs xs
217
218 normalize :: [Double] -> [Double]
219 normalize as = normalizeWith identity as
220
221 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
222 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
223 where
224 bs' = map extract bs
225
226 -- Zip functions to add
227 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
228 zipFst f xs = zip (f xs) xs
229
230 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
231 zipSnd f xs = zip xs (f xs)
232
233 -- | maximumWith
234 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
235 maximumWith f = L.maximumBy (compare `on` f)
236
237 -- | To get all combinations of a list with no
238 -- repetition and apply a function to the resulting list of pairs
239 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
240 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
241
242 ------------------------------------------------------------------------
243 -- Empty List Sugar Error Handling
244 -- TODO add Garg Monad Errors
245
246 listSafe1 :: Text -> ([a] -> Maybe a)
247 -> Text -> [a] -> a
248 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
249 where
250 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
251
252 head' :: Text -> [a] -> a
253 head' = listSafe1 "head" headMay
254
255 last' :: Text -> [a] -> a
256 last' = listSafe1 "last" lastMay
257
258 ------------------------------------------------------------------------
259
260 listSafeN :: Text -> ([a] -> Maybe [a])
261 -> Text -> [a] -> [a]
262 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
263 where
264 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
265
266 tail' :: Text -> [a] -> [a]
267 tail' = listSafeN "tail" tailMay
268
269 init' :: Text -> [a] -> [a]
270 init' = listSafeN "init" initMay
271
272 ------------------------------------------------------------------------
273 --- Some Statistics sugar functions
274 -- Exponential Average
275 eavg :: [Double] -> Double
276 eavg (x:xs) = a*x + (1-a)*(eavg xs)
277 where a = 0.70
278 eavg [] = 0
279
280 -- Simple Average
281 mean :: Fractional a => [a] -> a
282 mean xs = sum xs / fromIntegral (length xs)
283
284 sumMaybe :: Num a => [Maybe a] -> Maybe a
285 sumMaybe = fmap sum . M.sequence
286
287 variance :: Floating a => [a] -> a
288 variance xs = sum ys / (fromIntegral (length xs) - 1)
289 where
290 m = mean xs
291 ys = map (\x -> (x - m) ** 2) xs
292
293 deviation :: Floating a => [a] -> a
294 deviation = sqrt . variance
295
296 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
297 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
298
299 -----------------------------------------------------------------------
300 -----------------------------------------------------------------------
301 --- Map in Map = Map2
302 -- To avoid Map (a,a) b
303 type Map2 a b = Map a (Map a b)
304
305 lookup2 :: Ord a
306 => a
307 -> a
308 -> Map2 a b
309 -> Maybe b
310 lookup2 a b m = do
311 m' <- lookup a m
312 lookup b m'
313
314 -----------------------------------------------------------------------
315 foldM' :: (Monad m) => (a -> b -> m a) -> a -> [b] -> m a
316 foldM' _ z [] = return z
317 foldM' f z (x:xs) = do
318 z' <- f z x
319 z' `seq` foldM' f z' xs
320
321 -----------------------------------------------------------------------
322 -- | Instance for basic numerals
323 -- See the difference between Double and (Int Or Integer)
324 instance Monoid Double where
325 mempty = 1
326
327 instance Semigroup Double where
328 (<>) a b = a * b
329
330 -----------
331 instance Monoid Int where
332 mempty = 0
333
334 instance Semigroup Int where
335 (<>) a b = a + b
336 ----
337 instance Monoid Integer where
338 mempty = 0
339
340 instance Semigroup Integer where
341 (<>) a b = a + b
342
343 ------------------------------------------------------------------------
344
345 hasDuplicates :: Ord a => [a] -> Bool
346 hasDuplicates = hasDuplicatesWith Set.empty
347
348 hasDuplicatesWith :: Ord a => Set a -> [a] -> Bool
349 hasDuplicatesWith _seen [] =
350 False -- base case: empty lists never contain duplicates
351 hasDuplicatesWith seen (x:xs) =
352 -- If we have seen the current item before, we can short-circuit; otherwise,
353 -- we'll add it the the set of previously seen items and process the rest of the
354 -- list against that.
355 x `Set.member` seen || hasDuplicatesWith (Set.insert x seen) xs
356
357
358