]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
Merge branch 'ngrams-replace' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell...
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
14 {-# LANGUAGE TemplateHaskell #-}
15
16 module Gargantext.Prelude
17 ( module Gargantext.Prelude
18 , module Protolude
19 , headMay, lastMay
20 , module GHC.Err.Located
21 , module Text.Show
22 , module Text.Read
23 , cs
24 , module Data.Maybe
25 , round
26 , sortWith
27 , module Prelude
28 , MonadBase(..)
29 , Typeable
30 )
31 where
32
33 import Control.Monad.Base (MonadBase(..))
34 import GHC.Exts (sortWith)
35 import GHC.Err.Located (undefined)
36 import GHC.Real (round)
37 import Data.Map (Map, lookup)
38 import Data.Maybe (isJust, fromJust, maybe)
39 import Data.Text (Text)
40 import Data.Typeable (Typeable)
41 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
42 , Fractional, Num, Maybe(Just,Nothing)
43 , Enum, Bounded, Float
44 , Floating, Char, IO
45 , pure, (>>=), (=<<), (<*>), (<$>), (<&>), (>>)
46 , head, flip
47 , Ord, Integral, Foldable, RealFrac, Monad, filter
48 , reverse, map, mapM, zip, drop, take, zipWith
49 , sum, fromIntegral, length, fmap, foldl, foldl'
50 , takeWhile, sqrt, identity
51 , abs, min, max, maximum, minimum, return, snd, truncate
52 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
53 , Eq, (==), (>=), (<=), (<>), (/=)
54 , (&&), (||), not, any, all
55 , concatMap
56 , fst, snd, toS
57 , elem, die, mod, div, const, either
58 , curry, uncurry, repeat
59 , otherwise, when
60 , IO()
61 , compare
62 , on
63 , panic
64 , seq
65 )
66
67 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
68 -- TODO import functions optimized in Utils.Count
69 -- import Protolude hiding (head, last, all, any, sum, product, length)
70 -- import Gargantext.Utils.Count
71 import qualified Data.List as L hiding (head, sum)
72 import qualified Control.Monad as M
73 import qualified Data.Map as M
74 import Data.Map.Strict (insertWith)
75 import qualified Data.Vector as V
76 import Safe (headMay, lastMay, initMay, tailMay)
77 import Text.Show (Show(), show)
78 import Text.Read (Read())
79 import Data.String.Conversions (cs)
80
81
82 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
83 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
84 -- printDebug _ _ = pure ()
85
86
87 -- | splitEvery n == chunkAlong n n
88 splitEvery :: Int -> [a] -> [[a]]
89 splitEvery _ [] = []
90 splitEvery n xs =
91 let (h,t) = L.splitAt n xs
92 in h : splitEvery n t
93
94 type Grain = Int
95 type Step = Int
96
97 -- | Function to split a range into chunks
98 -- if step == grain then linearity (splitEvery)
99 -- elif step < grain then overlapping
100 -- else dotted with holes
101 -- TODO FIX BUG if Steps*Grain /= length l
102 -- chunkAlong 10 10 [1..15] == [1..10]
103 -- BUG: what about the rest of (divMod 15 10)?
104 -- TODO: chunkAlongNoRest or chunkAlongWithRest
105 -- default behavior: NoRest
106
107 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
108 chunkAlong a b l = case a >= length l of
109 True -> [l]
110 False -> chunkAlong' a b l
111
112 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
113 chunkAlong' a b l = case a > 0 && b > 0 of
114 True -> chunkAlong'' a b l
115 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
116
117 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
118 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
119 where
120 only = map (take a)
121 while = takeWhile (\x -> length x >= a)
122 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
123
124 -- | Optimized version (Vector)
125 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
126 chunkAlongV a b l = only (while dropAlong)
127 where
128 only = V.map (V.take a)
129 while = V.takeWhile (\x -> V.length x >= a)
130 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
131
132 -- | TODO Inverse of chunk ? unchunkAlong ?
133 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
134 -- unchunkAlong = undefined
135
136
137 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
138 splitAlong :: [Int] -> [Char] -> [[Char]]
139 splitAlong _ [] = [] -- No list? done
140 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
141 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
142 -- take until our split spot, recurse with next split spot and list remainder
143
144 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
145 takeWhileM _ [] = return []
146 takeWhileM p (a:as) = do
147 v <- a
148 if p v
149 then do
150 vs <- takeWhileM p as
151 return (v:vs)
152 else return []
153
154 -- SUMS
155 -- To select the right algorithme according to the type:
156 -- https://github.com/mikeizbicki/ifcxt
157
158 sumSimple :: Num a => [a] -> a
159 sumSimple = L.foldl' (+) 0
160
161 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
162 sumKahan :: Num a => [a] -> a
163 sumKahan = snd . L.foldl' go (0,0)
164 where
165 go (c,t) i = ((t'-t)-y,t')
166 where
167 y = i-c
168 t' = t+y
169
170 -- | compute part of the dict
171 count2map :: (Ord k, Foldable t) => t k -> Map k Double
172 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
173
174 -- | insert in a dict
175 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
176 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
177
178 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
179 trunc n = truncate . (* 10^n)
180
181 trunc' :: Int -> Double -> Double
182 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
183
184 ------------------------------------------------------------------------
185 bool2num :: Num a => Bool -> a
186 bool2num True = 1
187 bool2num False = 0
188
189 bool2double :: Bool -> Double
190 bool2double = bool2num
191
192 bool2int :: Bool -> Int
193 bool2int = bool2num
194 ------------------------------------------------------------------------
195
196 -- Normalizing && scaling data
197 scale :: [Double] -> [Double]
198 scale = scaleMinMax
199
200 scaleMinMax :: [Double] -> [Double]
201 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
202 where
203 ma = maximum xs'
204 mi = minimum xs'
205 xs' = map abs xs
206
207 scaleNormalize :: [Double] -> [Double]
208 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
209 where
210 v = variance xs'
211 m = mean xs'
212 xs' = map abs xs
213
214 normalize :: [Double] -> [Double]
215 normalize as = normalizeWith identity as
216
217 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
218 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
219 where
220 bs' = map extract bs
221
222 -- Zip functions to add
223 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
224 zipFst f xs = zip (f xs) xs
225
226 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
227 zipSnd f xs = zip xs (f xs)
228
229 -- | maximumWith
230 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
231 maximumWith f = L.maximumBy (compare `on` f)
232
233 -- | To get all combinations of a list with no
234 -- repetition and apply a function to the resulting list of pairs
235 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
236 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
237
238 ------------------------------------------------------------------------
239 -- Empty List Sugar Error Handling
240 -- TODO add Garg Monad Errors
241
242 listSafe1 :: Text -> ([a] -> Maybe a)
243 -> Text -> [a] -> a
244 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
245 where
246 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
247
248 head' :: Text -> [a] -> a
249 head' = listSafe1 "head" headMay
250
251 last' :: Text -> [a] -> a
252 last' = listSafe1 "last" lastMay
253
254 ------------------------------------------------------------------------
255
256 listSafeN :: Text -> ([a] -> Maybe [a])
257 -> Text -> [a] -> [a]
258 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
259 where
260 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
261
262 tail' :: Text -> [a] -> [a]
263 tail' = listSafeN "tail" tailMay
264
265 init' :: Text -> [a] -> [a]
266 init' = listSafeN "init" initMay
267
268 ------------------------------------------------------------------------
269 --- Some Statistics sugar functions
270 -- Exponential Average
271 eavg :: [Double] -> Double
272 eavg (x:xs) = a*x + (1-a)*(eavg xs)
273 where a = 0.70
274 eavg [] = 0
275
276 -- Simple Average
277 mean :: Fractional a => [a] -> a
278 mean xs = sum xs / fromIntegral (length xs)
279
280 sumMaybe :: Num a => [Maybe a] -> Maybe a
281 sumMaybe = fmap sum . M.sequence
282
283 variance :: Floating a => [a] -> a
284 variance xs = sum ys / (fromIntegral (length xs) - 1)
285 where
286 m = mean xs
287 ys = map (\x -> (x - m) ** 2) xs
288
289 deviation :: Floating a => [a] -> a
290 deviation = sqrt . variance
291
292 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
293 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
294
295 -----------------------------------------------------------------------
296 -----------------------------------------------------------------------
297 --- Map in Map = Map2
298 -- To avoid Map (a,a) b
299 type Map2 a b = Map a (Map a b)
300
301 lookup2 :: Ord a
302 => a
303 -> a
304 -> Map2 a b
305 -> Maybe b
306 lookup2 a b m = do
307 m' <- lookup a m
308 lookup b m'
309
310 -----------------------------------------------
311
312 foldM' :: (Monad m) => (a -> b -> m a) -> a -> [b] -> m a
313 foldM' _ z [] = return z
314 foldM' f z (x:xs) = do
315 z' <- f z x
316 z' `seq` foldM' f z' xs
317
318