]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[graph] add debug info for graph, no partition error
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 {-# OPTIONS_GHC -fno-warn-orphans #-}
14 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
15
16 module Gargantext.Prelude
17 ( module Gargantext.Prelude
18 , module Protolude
19 , module GHC.Err.Located
20 , module Text.Show
21 , module Text.Read
22 , module Data.Maybe
23 , module Prelude
24 , MonadBase(..)
25 , Typeable
26 , cs
27 , headMay, lastMay, sortWith
28 , round
29 )
30 where
31
32 import Control.Monad.Base (MonadBase(..))
33 import Data.Set (Set)
34 import GHC.Exts (sortWith)
35 import GHC.Err.Located (undefined)
36 import GHC.Real (round)
37 import Data.Map (Map, lookup)
38 import Data.Maybe (isJust, fromJust, maybe)
39 import Data.Monoid (Monoid, mempty)
40 import Data.Semigroup (Semigroup, (<>))
41 import Data.Text (Text, pack)
42 import Data.Typeable (Typeable)
43 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
44 , Fractional, Num, Maybe(Just,Nothing)
45 , Enum, Bounded, Float
46 , Floating, Char, IO
47 , Functor(..)
48 , pure, (>>=), (=<<), (<*>), (<$>), (<&>), (>>)
49 , head, flip
50 , Ord, Integral, Foldable, RealFrac, Monad, filter
51 , reverse, map, mapM, zip, drop, take, zipWith
52 , sum, fromIntegral, length, fmap, foldl, foldl'
53 , takeWhile, sqrt, identity
54 , abs, min, max, maximum, minimum, return, snd, truncate
55 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
56 , Eq, (==), (>=), (<=), (<>), (/=), xor
57 , (&&), (||), not, any, all
58 , concatMap
59 , fst, snd, toS
60 , elem, die, mod, div, const, either
61 , curry, uncurry, repeat
62 , otherwise, when
63 , IO()
64 , compare
65 , on
66 , panic
67 , seq
68 )
69 import qualified Protolude as Protolude (writeFile)
70
71 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
72 -- TODO import functions optimized in Utils.Count
73 -- import Protolude hiding (head, last, all, any, sum, product, length)
74 -- import Gargantext.Utils.Count
75 import Data.Map.Strict (insertWith)
76 import Data.String.Conversions (cs)
77 import Safe (headMay, lastMay, initMay, tailMay)
78 import Text.Read (Read())
79 import Text.Show (Show(), show)
80 import qualified Control.Monad as M
81 import qualified Data.List as L hiding (head, sum)
82 import qualified Data.Map as M
83 import qualified Data.Set as Set
84 import qualified Data.Vector as V
85
86
87 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
88 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
89 -- printDebug _ _ = pure ()
90
91 saveAsFileDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
92 saveAsFileDebug fname x = liftBase . Protolude.writeFile fname $ pack $ show x
93
94
95 -- | splitEvery n == chunkAlong n n
96 splitEvery :: Int -> [a] -> [[a]]
97 splitEvery _ [] = []
98 splitEvery n xs =
99 let (h,t) = L.splitAt n xs
100 in h : splitEvery n t
101
102 type Grain = Int
103 type Step = Int
104
105 -- | Function to split a range into chunks
106 -- if step == grain then linearity (splitEvery)
107 -- elif step < grain then overlapping
108 -- else dotted with holes
109 -- TODO FIX BUG if Steps*Grain /= length l
110 -- chunkAlong 10 10 [1..15] == [1..10]
111 -- BUG: what about the rest of (divMod 15 10)?
112 -- TODO: chunkAlongNoRest or chunkAlongWithRest
113 -- default behavior: NoRest
114
115 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
116 chunkAlong a b l = case a >= length l of
117 True -> [l]
118 False -> chunkAlong' a b l
119
120 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
121 chunkAlong' a b l = case a > 0 && b > 0 of
122 True -> chunkAlong'' a b l
123 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
124
125 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
126 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
127 where
128 only = map (take a)
129 while = takeWhile (\x -> length x >= a)
130 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
131
132 -- | Optimized version (Vector)
133 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
134 chunkAlongV a b l = only (while dropAlong)
135 where
136 only = V.map (V.take a)
137 while = V.takeWhile (\x -> V.length x >= a)
138 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
139
140 -- | TODO Inverse of chunk ? unchunkAlong ?
141 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
142 -- unchunkAlong = undefined
143
144
145 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
146 splitAlong :: [Int] -> [Char] -> [[Char]]
147 splitAlong _ [] = [] -- No list? done
148 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
149 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
150 -- take until our split spot, recurse with next split spot and list remainder
151
152 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
153 takeWhileM _ [] = return []
154 takeWhileM p (a:as) = do
155 v <- a
156 if p v
157 then do
158 vs <- takeWhileM p as
159 return (v:vs)
160 else return []
161
162 -- SUMS
163 -- To select the right algorithme according to the type:
164 -- https://github.com/mikeizbicki/ifcxt
165
166 sumSimple :: Num a => [a] -> a
167 sumSimple = L.foldl' (+) 0
168
169 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
170 sumKahan :: Num a => [a] -> a
171 sumKahan = snd . L.foldl' go (0,0)
172 where
173 go (c,t) i = ((t'-t)-y,t')
174 where
175 y = i-c
176 t' = t+y
177
178 -- | compute part of the dict
179 count2map :: (Ord k, Foldable t) => t k -> Map k Double
180 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
181
182 -- | insert in a dict
183 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
184 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
185
186 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
187 trunc n = truncate . (* 10^n)
188
189 trunc' :: Int -> Double -> Double
190 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
191
192 ------------------------------------------------------------------------
193 bool2num :: Num a => Bool -> a
194 bool2num True = 1
195 bool2num False = 0
196
197 bool2double :: Bool -> Double
198 bool2double = bool2num
199
200 bool2int :: Bool -> Int
201 bool2int = bool2num
202 ------------------------------------------------------------------------
203
204 -- Normalizing && scaling data
205 scale :: [Double] -> [Double]
206 scale = scaleMinMax
207
208 scaleMinMax :: [Double] -> [Double]
209 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
210 where
211 ma = maximum xs'
212 mi = minimum xs'
213 xs' = map abs xs
214
215 scaleNormalize :: [Double] -> [Double]
216 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
217 where
218 v = variance xs'
219 m = mean xs'
220 xs' = map abs xs
221
222 normalize :: [Double] -> [Double]
223 normalize as = normalizeWith identity as
224
225 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
226 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
227 where
228 bs' = map extract bs
229
230 -- Zip functions to add
231 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
232 zipFst f xs = zip (f xs) xs
233
234 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
235 zipSnd f xs = zip xs (f xs)
236
237 -- | maximumWith
238 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
239 maximumWith f = L.maximumBy (compare `on` f)
240
241 -- | To get all combinations of a list with no
242 -- repetition and apply a function to the resulting list of pairs
243 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
244 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
245
246 ------------------------------------------------------------------------
247 -- Empty List Sugar Error Handling
248 -- TODO add Garg Monad Errors
249
250 listSafe1 :: Text -> ([a] -> Maybe a)
251 -> Text -> [a] -> a
252 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
253 where
254 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
255
256 head' :: Text -> [a] -> a
257 head' = listSafe1 "head" headMay
258
259 last' :: Text -> [a] -> a
260 last' = listSafe1 "last" lastMay
261
262 ------------------------------------------------------------------------
263
264 listSafeN :: Text -> ([a] -> Maybe [a])
265 -> Text -> [a] -> [a]
266 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
267 where
268 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
269
270 tail' :: Text -> [a] -> [a]
271 tail' = listSafeN "tail" tailMay
272
273 init' :: Text -> [a] -> [a]
274 init' = listSafeN "init" initMay
275
276 ------------------------------------------------------------------------
277 --- Some Statistics sugar functions
278 -- Exponential Average
279 eavg :: [Double] -> Double
280 eavg (x:xs) = a*x + (1-a)*(eavg xs)
281 where a = 0.70
282 eavg [] = 0
283
284 -- Simple Average
285 mean :: Fractional a => [a] -> a
286 mean xs = sum xs / fromIntegral (length xs)
287
288 sumMaybe :: Num a => [Maybe a] -> Maybe a
289 sumMaybe = fmap sum . M.sequence
290
291 variance :: Floating a => [a] -> a
292 variance xs = sum ys / (fromIntegral (length xs) - 1)
293 where
294 m = mean xs
295 ys = map (\x -> (x - m) ** 2) xs
296
297 deviation :: Floating a => [a] -> a
298 deviation = sqrt . variance
299
300 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
301 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
302
303 -----------------------------------------------------------------------
304 -----------------------------------------------------------------------
305 --- Map in Map = Map2
306 -- To avoid Map (a,a) b
307 type Map2 a b = Map a (Map a b)
308
309 lookup2 :: Ord a
310 => a
311 -> a
312 -> Map2 a b
313 -> Maybe b
314 lookup2 a b m = do
315 m' <- lookup a m
316 lookup b m'
317
318 -----------------------------------------------------------------------
319 foldM' :: (Monad m) => (a -> b -> m a) -> a -> [b] -> m a
320 foldM' _ z [] = return z
321 foldM' f z (x:xs) = do
322 z' <- f z x
323 z' `seq` foldM' f z' xs
324
325 -----------------------------------------------------------------------
326 -- | Instance for basic numerals
327 -- See the difference between Double and (Int Or Integer)
328 instance Monoid Double where
329 mempty = 1
330
331 instance Semigroup Double where
332 (<>) a b = a * b
333
334 -----------
335 instance Monoid Int where
336 mempty = 0
337
338 instance Semigroup Int where
339 (<>) a b = a + b
340 ----
341 instance Monoid Integer where
342 mempty = 0
343
344 instance Semigroup Integer where
345 (<>) a b = a + b
346
347 ------------------------------------------------------------------------
348
349 hasDuplicates :: Ord a => [a] -> Bool
350 hasDuplicates = hasDuplicatesWith Set.empty
351
352 hasDuplicatesWith :: Ord a => Set a -> [a] -> Bool
353 hasDuplicatesWith _seen [] =
354 False -- base case: empty lists never contain duplicates
355 hasDuplicatesWith seen (x:xs) =
356 -- If we have seen the current item before, we can short-circuit; otherwise,
357 -- we'll add it the the set of previously seen items and process the rest of the
358 -- list against that.
359 x `Set.member` seen || hasDuplicatesWith (Set.insert x seen) xs
360
361
362