]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[REFACT] toGroupedTree done for Ngrams Terms
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
14
15 module Gargantext.Prelude
16 ( module Gargantext.Prelude
17 , module Protolude
18 , headMay, lastMay
19 , module GHC.Err.Located
20 , module Text.Show
21 , module Text.Read
22 , cs
23 , module Data.Maybe
24 , round
25 , sortWith
26 , module Prelude
27 , MonadBase(..)
28 , Typeable
29 )
30 where
31
32 import Control.Monad.Base (MonadBase(..))
33 import GHC.Exts (sortWith)
34 import GHC.Err.Located (undefined)
35 import GHC.Real (round)
36 import Data.Map (Map, lookup)
37 import Data.Maybe (isJust, fromJust, maybe)
38 import Data.Monoid (Monoid, mempty)
39 import Data.Semigroup (Semigroup, (<>))
40 import Data.Text (Text)
41 import Data.Typeable (Typeable)
42 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
43 , Fractional, Num, Maybe(Just,Nothing)
44 , Enum, Bounded, Float
45 , Floating, Char, IO
46 , pure, (>>=), (=<<), (<*>), (<$>), (<&>), (>>)
47 , head, flip
48 , Ord, Integral, Foldable, RealFrac, Monad, filter
49 , reverse, map, mapM, zip, drop, take, zipWith
50 , sum, fromIntegral, length, fmap, foldl, foldl'
51 , takeWhile, sqrt, identity
52 , abs, min, max, maximum, minimum, return, snd, truncate
53 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
54 , Eq, (==), (>=), (<=), (<>), (/=)
55 , (&&), (||), not, any, all
56 , concatMap
57 , fst, snd, toS
58 , elem, die, mod, div, const, either
59 , curry, uncurry, repeat
60 , otherwise, when
61 , IO()
62 , compare
63 , on
64 , panic
65 , seq
66 )
67
68 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
69 -- TODO import functions optimized in Utils.Count
70 -- import Protolude hiding (head, last, all, any, sum, product, length)
71 -- import Gargantext.Utils.Count
72 import qualified Data.List as L hiding (head, sum)
73 import qualified Control.Monad as M
74 import qualified Data.Map as M
75 import Data.Map.Strict (insertWith)
76 import qualified Data.Vector as V
77 import Safe (headMay, lastMay, initMay, tailMay)
78 import Text.Show (Show(), show)
79 import Text.Read (Read())
80 import Data.String.Conversions (cs)
81
82
83 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
84 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
85 -- printDebug _ _ = pure ()
86
87
88 -- | splitEvery n == chunkAlong n n
89 splitEvery :: Int -> [a] -> [[a]]
90 splitEvery _ [] = []
91 splitEvery n xs =
92 let (h,t) = L.splitAt n xs
93 in h : splitEvery n t
94
95 type Grain = Int
96 type Step = Int
97
98 -- | Function to split a range into chunks
99 -- if step == grain then linearity (splitEvery)
100 -- elif step < grain then overlapping
101 -- else dotted with holes
102 -- TODO FIX BUG if Steps*Grain /= length l
103 -- chunkAlong 10 10 [1..15] == [1..10]
104 -- BUG: what about the rest of (divMod 15 10)?
105 -- TODO: chunkAlongNoRest or chunkAlongWithRest
106 -- default behavior: NoRest
107
108 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
109 chunkAlong a b l = case a >= length l of
110 True -> [l]
111 False -> chunkAlong' a b l
112
113 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
114 chunkAlong' a b l = case a > 0 && b > 0 of
115 True -> chunkAlong'' a b l
116 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
117
118 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
119 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
120 where
121 only = map (take a)
122 while = takeWhile (\x -> length x >= a)
123 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
124
125 -- | Optimized version (Vector)
126 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
127 chunkAlongV a b l = only (while dropAlong)
128 where
129 only = V.map (V.take a)
130 while = V.takeWhile (\x -> V.length x >= a)
131 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
132
133 -- | TODO Inverse of chunk ? unchunkAlong ?
134 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
135 -- unchunkAlong = undefined
136
137
138 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
139 splitAlong :: [Int] -> [Char] -> [[Char]]
140 splitAlong _ [] = [] -- No list? done
141 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
142 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
143 -- take until our split spot, recurse with next split spot and list remainder
144
145 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
146 takeWhileM _ [] = return []
147 takeWhileM p (a:as) = do
148 v <- a
149 if p v
150 then do
151 vs <- takeWhileM p as
152 return (v:vs)
153 else return []
154
155 -- SUMS
156 -- To select the right algorithme according to the type:
157 -- https://github.com/mikeizbicki/ifcxt
158
159 sumSimple :: Num a => [a] -> a
160 sumSimple = L.foldl' (+) 0
161
162 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
163 sumKahan :: Num a => [a] -> a
164 sumKahan = snd . L.foldl' go (0,0)
165 where
166 go (c,t) i = ((t'-t)-y,t')
167 where
168 y = i-c
169 t' = t+y
170
171 -- | compute part of the dict
172 count2map :: (Ord k, Foldable t) => t k -> Map k Double
173 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
174
175 -- | insert in a dict
176 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
177 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
178
179 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
180 trunc n = truncate . (* 10^n)
181
182 trunc' :: Int -> Double -> Double
183 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
184
185 ------------------------------------------------------------------------
186 bool2num :: Num a => Bool -> a
187 bool2num True = 1
188 bool2num False = 0
189
190 bool2double :: Bool -> Double
191 bool2double = bool2num
192
193 bool2int :: Bool -> Int
194 bool2int = bool2num
195 ------------------------------------------------------------------------
196
197 -- Normalizing && scaling data
198 scale :: [Double] -> [Double]
199 scale = scaleMinMax
200
201 scaleMinMax :: [Double] -> [Double]
202 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
203 where
204 ma = maximum xs'
205 mi = minimum xs'
206 xs' = map abs xs
207
208 scaleNormalize :: [Double] -> [Double]
209 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
210 where
211 v = variance xs'
212 m = mean xs'
213 xs' = map abs xs
214
215 normalize :: [Double] -> [Double]
216 normalize as = normalizeWith identity as
217
218 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
219 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
220 where
221 bs' = map extract bs
222
223 -- Zip functions to add
224 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
225 zipFst f xs = zip (f xs) xs
226
227 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
228 zipSnd f xs = zip xs (f xs)
229
230 -- | maximumWith
231 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
232 maximumWith f = L.maximumBy (compare `on` f)
233
234 -- | To get all combinations of a list with no
235 -- repetition and apply a function to the resulting list of pairs
236 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
237 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
238
239 ------------------------------------------------------------------------
240 -- Empty List Sugar Error Handling
241 -- TODO add Garg Monad Errors
242
243 listSafe1 :: Text -> ([a] -> Maybe a)
244 -> Text -> [a] -> a
245 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
246 where
247 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
248
249 head' :: Text -> [a] -> a
250 head' = listSafe1 "head" headMay
251
252 last' :: Text -> [a] -> a
253 last' = listSafe1 "last" lastMay
254
255 ------------------------------------------------------------------------
256
257 listSafeN :: Text -> ([a] -> Maybe [a])
258 -> Text -> [a] -> [a]
259 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
260 where
261 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
262
263 tail' :: Text -> [a] -> [a]
264 tail' = listSafeN "tail" tailMay
265
266 init' :: Text -> [a] -> [a]
267 init' = listSafeN "init" initMay
268
269 ------------------------------------------------------------------------
270 --- Some Statistics sugar functions
271 -- Exponential Average
272 eavg :: [Double] -> Double
273 eavg (x:xs) = a*x + (1-a)*(eavg xs)
274 where a = 0.70
275 eavg [] = 0
276
277 -- Simple Average
278 mean :: Fractional a => [a] -> a
279 mean xs = sum xs / fromIntegral (length xs)
280
281 sumMaybe :: Num a => [Maybe a] -> Maybe a
282 sumMaybe = fmap sum . M.sequence
283
284 variance :: Floating a => [a] -> a
285 variance xs = sum ys / (fromIntegral (length xs) - 1)
286 where
287 m = mean xs
288 ys = map (\x -> (x - m) ** 2) xs
289
290 deviation :: Floating a => [a] -> a
291 deviation = sqrt . variance
292
293 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
294 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
295
296 -----------------------------------------------------------------------
297 -----------------------------------------------------------------------
298 --- Map in Map = Map2
299 -- To avoid Map (a,a) b
300 type Map2 a b = Map a (Map a b)
301
302 lookup2 :: Ord a
303 => a
304 -> a
305 -> Map2 a b
306 -> Maybe b
307 lookup2 a b m = do
308 m' <- lookup a m
309 lookup b m'
310
311 -----------------------------------------------------------------------
312 foldM' :: (Monad m) => (a -> b -> m a) -> a -> [b] -> m a
313 foldM' _ z [] = return z
314 foldM' f z (x:xs) = do
315 z' <- f z x
316 z' `seq` foldM' f z' xs
317
318 -----------------------------------------------------------------------
319
320 instance Monoid Double where
321 mempty = 0
322
323 instance Semigroup Double where
324 (<>) a b = a * b
325