]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[REFACT]
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-orphans #-}
13 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
14
15 module Gargantext.Prelude
16 ( module Gargantext.Prelude
17 , module Protolude
18 , module GHC.Err.Located
19 , module Text.Show
20 , module Text.Read
21 , module Data.Maybe
22 , module Prelude
23 , MonadBase(..)
24 , Typeable
25 , cs
26 , headMay, lastMay, sortWith
27 , round
28 )
29 where
30
31 import Control.Monad.Base (MonadBase(..))
32 import Data.Set (Set)
33 import GHC.Exts (sortWith)
34 import GHC.Err.Located (undefined)
35 import GHC.Real (round)
36 import Data.Map (Map, lookup)
37 import Data.Maybe (isJust, fromJust, maybe)
38 import Data.Monoid (Monoid, mempty)
39 import Data.Semigroup (Semigroup, (<>))
40 import Data.Text (Text, pack)
41 import Data.Typeable (Typeable)
42 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
43 , Fractional, Num, Maybe(Just,Nothing)
44 , Enum, Bounded, Float
45 , Floating, Char, IO
46 , Functor(..)
47 , pure, (>>=), (=<<), (<*>), (<$>), (<&>), (>>)
48 , head, flip
49 , Ord, Integral, Foldable, RealFrac, Monad, filter
50 , reverse, map, mapM, zip, drop, take, zipWith
51 , sum, fromIntegral, length, fmap, foldl, foldl'
52 , takeWhile, sqrt, identity
53 , abs, min, max, maximum, minimum, return, snd, truncate
54 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
55 , Eq, (==), (>=), (<=), (<>), (/=), xor
56 , (&&), (||), not, any, all
57 , concatMap
58 , fst, snd, toS
59 , elem, die, mod, div, const, either
60 , curry, uncurry, repeat
61 , otherwise, when
62 , IO()
63 , compare
64 , on
65 , panic
66 , seq
67 )
68 import qualified Protolude as Protolude (writeFile)
69
70 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
71 -- TODO import functions optimized in Utils.Count
72 -- import Protolude hiding (head, last, all, any, sum, product, length)
73 -- import Gargantext.Utils.Count
74 import Data.Map.Strict (insertWith)
75 import Data.String.Conversions (cs)
76 import Safe (headMay, lastMay, initMay, tailMay)
77 import Text.Read (Read())
78 import Text.Show (Show(), show)
79 import qualified Control.Monad as M
80 import qualified Data.List as L hiding (head, sum)
81 import qualified Data.Map as M
82 import qualified Data.Set as Set
83 import qualified Data.Vector as V
84 import System.FilePath.Posix (takeDirectory)
85 import System.Directory (createDirectoryIfMissing)
86
87 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
88 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
89 -- printDebug _ _ = pure ()
90
91 saveAsFileDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
92 saveAsFileDebug fname x = do
93 let dir = takeDirectory fname
94 _ <- liftBase $ createDirectoryIfMissing True dir
95 liftBase . Protolude.writeFile fname $ pack $ show x
96
97
98 -- | splitEvery n == chunkAlong n n
99 splitEvery :: Int -> [a] -> [[a]]
100 splitEvery _ [] = []
101 splitEvery n xs =
102 let (h,t) = L.splitAt n xs
103 in h : splitEvery n t
104
105 type Grain = Int
106 type Step = Int
107
108 -- | Function to split a range into chunks
109 -- if step == grain then linearity (splitEvery)
110 -- elif step < grain then overlapping
111 -- else dotted with holes
112 -- TODO FIX BUG if Steps*Grain /= length l
113 -- chunkAlong 10 10 [1..15] == [1..10]
114 -- BUG: what about the rest of (divMod 15 10)?
115 -- TODO: chunkAlongNoRest or chunkAlongWithRest
116 -- default behavior: NoRest
117
118 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
119 chunkAlong a b l = case a >= length l of
120 True -> [l]
121 False -> chunkAlong' a b l
122
123 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
124 chunkAlong' a b l = case a > 0 && b > 0 of
125 True -> chunkAlong'' a b l
126 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
127
128 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
129 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
130 where
131 only = map (take a)
132 while = takeWhile (\x -> length x >= a)
133 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
134
135 -- | Optimized version (Vector)
136 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
137 chunkAlongV a b l = only (while dropAlong)
138 where
139 only = V.map (V.take a)
140 while = V.takeWhile (\x -> V.length x >= a)
141 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
142
143 -- | TODO Inverse of chunk ? unchunkAlong ?
144 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
145 -- unchunkAlong = undefined
146
147
148 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
149 splitAlong :: [Int] -> [Char] -> [[Char]]
150 splitAlong _ [] = [] -- No list? done
151 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
152 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
153 -- take until our split spot, recurse with next split spot and list remainder
154
155 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
156 takeWhileM _ [] = return []
157 takeWhileM p (a:as) = do
158 v <- a
159 if p v
160 then do
161 vs <- takeWhileM p as
162 return (v:vs)
163 else return []
164
165 -- SUMS
166 -- To select the right algorithme according to the type:
167 -- https://github.com/mikeizbicki/ifcxt
168
169 sumSimple :: Num a => [a] -> a
170 sumSimple = L.foldl' (+) 0
171
172 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
173 sumKahan :: Num a => [a] -> a
174 sumKahan = snd . L.foldl' go (0,0)
175 where
176 go (c,t) i = ((t'-t)-y,t')
177 where
178 y = i-c
179 t' = t+y
180
181 -- | compute part of the dict
182 count2map :: (Ord k, Foldable t) => t k -> Map k Double
183 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
184
185 -- | insert in a dict
186 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
187 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
188
189 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
190 trunc n = truncate . (* 10^n)
191
192 trunc' :: Int -> Double -> Double
193 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
194
195 ------------------------------------------------------------------------
196 bool2num :: Num a => Bool -> a
197 bool2num True = 1
198 bool2num False = 0
199
200 bool2double :: Bool -> Double
201 bool2double = bool2num
202
203 bool2int :: Bool -> Int
204 bool2int = bool2num
205 ------------------------------------------------------------------------
206
207 -- Normalizing && scaling data
208 scale :: [Double] -> [Double]
209 scale = scaleMinMax
210
211 scaleMinMax :: [Double] -> [Double]
212 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
213 where
214 ma = maximum xs'
215 mi = minimum xs'
216 xs' = map abs xs
217
218 scaleNormalize :: [Double] -> [Double]
219 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
220 where
221 v = variance xs'
222 m = mean xs'
223 xs' = map abs xs
224
225 normalize :: [Double] -> [Double]
226 normalize as = normalizeWith identity as
227
228 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
229 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
230 where
231 bs' = map extract bs
232
233 -- Zip functions to add
234 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
235 zipFst f xs = zip (f xs) xs
236
237 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
238 zipSnd f xs = zip xs (f xs)
239
240 -- | maximumWith
241 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
242 maximumWith f = L.maximumBy (compare `on` f)
243
244 -- | To get all combinations of a list with no
245 -- repetition and apply a function to the resulting list of pairs
246 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
247 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
248
249 ------------------------------------------------------------------------
250 -- Empty List Sugar Error Handling
251 -- TODO add Garg Monad Errors
252
253 listSafe1 :: Text -> ([a] -> Maybe a)
254 -> Text -> [a] -> a
255 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
256 where
257 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
258
259 head' :: Text -> [a] -> a
260 head' = listSafe1 "head" headMay
261
262 last' :: Text -> [a] -> a
263 last' = listSafe1 "last" lastMay
264
265 ------------------------------------------------------------------------
266
267 listSafeN :: Text -> ([a] -> Maybe [a])
268 -> Text -> [a] -> [a]
269 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
270 where
271 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
272
273 tail' :: Text -> [a] -> [a]
274 tail' = listSafeN "tail" tailMay
275
276 init' :: Text -> [a] -> [a]
277 init' = listSafeN "init" initMay
278
279 ------------------------------------------------------------------------
280 --- Some Statistics sugar functions
281 -- Exponential Average
282 eavg :: [Double] -> Double
283 eavg (x:xs) = a*x + (1-a)*(eavg xs)
284 where a = 0.70
285 eavg [] = 0
286
287 -- Simple Average
288 mean :: Fractional a => [a] -> a
289 mean xs = sum xs / fromIntegral (length xs)
290
291 sumMaybe :: Num a => [Maybe a] -> Maybe a
292 sumMaybe = fmap sum . M.sequence
293
294 variance :: Floating a => [a] -> a
295 variance xs = sum ys / (fromIntegral (length xs) - 1)
296 where
297 m = mean xs
298 ys = map (\x -> (x - m) ** 2) xs
299
300 deviation :: Floating a => [a] -> a
301 deviation = sqrt . variance
302
303 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
304 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
305
306 -----------------------------------------------------------------------
307 -----------------------------------------------------------------------
308 --- Map in Map = Map2
309 -- To avoid Map (a,a) b
310 type Map2 a b = Map a (Map a b)
311
312 lookup2 :: Ord a
313 => a
314 -> a
315 -> Map2 a b
316 -> Maybe b
317 lookup2 a b m = do
318 m' <- lookup a m
319 lookup b m'
320
321 -----------------------------------------------------------------------
322 foldM' :: (Monad m) => (a -> b -> m a) -> a -> [b] -> m a
323 foldM' _ z [] = return z
324 foldM' f z (x:xs) = do
325 z' <- f z x
326 z' `seq` foldM' f z' xs
327
328 -----------------------------------------------------------------------
329 -- | Instance for basic numerals
330 -- See the difference between Double and (Int Or Integer)
331 instance Monoid Double where
332 mempty = 1
333
334 instance Semigroup Double where
335 (<>) a b = a * b
336
337 -----------
338 instance Monoid Int where
339 mempty = 0
340
341 instance Semigroup Int where
342 (<>) a b = a + b
343 ----
344 instance Monoid Integer where
345 mempty = 0
346
347 instance Semigroup Integer where
348 (<>) a b = a + b
349
350 ------------------------------------------------------------------------
351
352 hasDuplicates :: Ord a => [a] -> Bool
353 hasDuplicates = hasDuplicatesWith Set.empty
354
355 hasDuplicatesWith :: Ord a => Set a -> [a] -> Bool
356 hasDuplicatesWith _seen [] =
357 False -- base case: empty lists never contain duplicates
358 hasDuplicatesWith seen (x:xs) =
359 -- If we have seen the current item before, we can short-circuit; otherwise,
360 -- we'll add it the the set of previously seen items and process the rest of the
361 -- list against that.
362 x `Set.member` seen || hasDuplicatesWith (Set.insert x seen) xs
363