]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[Warnings+FEAT] insert for new created ngrams
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# OPTIONS_GHC -fno-warn-orphans #-}
14 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
15
16 module Gargantext.Prelude
17 ( module Gargantext.Prelude
18 , module Protolude
19 , headMay, lastMay
20 , module GHC.Err.Located
21 , module Text.Show
22 , module Text.Read
23 , cs
24 , module Data.Maybe
25 , round
26 , sortWith
27 , module Prelude
28 , MonadBase(..)
29 , Typeable
30 )
31 where
32
33 import Control.Monad.Base (MonadBase(..))
34 import GHC.Exts (sortWith)
35 import GHC.Err.Located (undefined)
36 import GHC.Real (round)
37 import Data.Map (Map, lookup)
38 import Data.Maybe (isJust, fromJust, maybe)
39 import Data.Monoid (Monoid, mempty)
40 import Data.Semigroup (Semigroup, (<>))
41 import Data.Text (Text)
42 import Data.Typeable (Typeable)
43 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
44 , Fractional, Num, Maybe(Just,Nothing)
45 , Enum, Bounded, Float
46 , Floating, Char, IO
47 , pure, (>>=), (=<<), (<*>), (<$>), (<&>), (>>)
48 , head, flip
49 , Ord, Integral, Foldable, RealFrac, Monad, filter
50 , reverse, map, mapM, zip, drop, take, zipWith
51 , sum, fromIntegral, length, fmap, foldl, foldl'
52 , takeWhile, sqrt, identity
53 , abs, min, max, maximum, minimum, return, snd, truncate
54 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
55 , Eq, (==), (>=), (<=), (<>), (/=)
56 , (&&), (||), not, any, all
57 , concatMap
58 , fst, snd, toS
59 , elem, die, mod, div, const, either
60 , curry, uncurry, repeat
61 , otherwise, when
62 , IO()
63 , compare
64 , on
65 , panic
66 , seq
67 )
68
69 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
70 -- TODO import functions optimized in Utils.Count
71 -- import Protolude hiding (head, last, all, any, sum, product, length)
72 -- import Gargantext.Utils.Count
73 import qualified Data.List as L hiding (head, sum)
74 import qualified Control.Monad as M
75 import qualified Data.Map as M
76 import Data.Map.Strict (insertWith)
77 import qualified Data.Vector as V
78 import Safe (headMay, lastMay, initMay, tailMay)
79 import Text.Show (Show(), show)
80 import Text.Read (Read())
81 import Data.String.Conversions (cs)
82
83
84 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
85 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
86 -- printDebug _ _ = pure ()
87
88
89 -- | splitEvery n == chunkAlong n n
90 splitEvery :: Int -> [a] -> [[a]]
91 splitEvery _ [] = []
92 splitEvery n xs =
93 let (h,t) = L.splitAt n xs
94 in h : splitEvery n t
95
96 type Grain = Int
97 type Step = Int
98
99 -- | Function to split a range into chunks
100 -- if step == grain then linearity (splitEvery)
101 -- elif step < grain then overlapping
102 -- else dotted with holes
103 -- TODO FIX BUG if Steps*Grain /= length l
104 -- chunkAlong 10 10 [1..15] == [1..10]
105 -- BUG: what about the rest of (divMod 15 10)?
106 -- TODO: chunkAlongNoRest or chunkAlongWithRest
107 -- default behavior: NoRest
108
109 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
110 chunkAlong a b l = case a >= length l of
111 True -> [l]
112 False -> chunkAlong' a b l
113
114 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
115 chunkAlong' a b l = case a > 0 && b > 0 of
116 True -> chunkAlong'' a b l
117 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
118
119 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
120 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
121 where
122 only = map (take a)
123 while = takeWhile (\x -> length x >= a)
124 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
125
126 -- | Optimized version (Vector)
127 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
128 chunkAlongV a b l = only (while dropAlong)
129 where
130 only = V.map (V.take a)
131 while = V.takeWhile (\x -> V.length x >= a)
132 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
133
134 -- | TODO Inverse of chunk ? unchunkAlong ?
135 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
136 -- unchunkAlong = undefined
137
138
139 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
140 splitAlong :: [Int] -> [Char] -> [[Char]]
141 splitAlong _ [] = [] -- No list? done
142 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
143 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
144 -- take until our split spot, recurse with next split spot and list remainder
145
146 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
147 takeWhileM _ [] = return []
148 takeWhileM p (a:as) = do
149 v <- a
150 if p v
151 then do
152 vs <- takeWhileM p as
153 return (v:vs)
154 else return []
155
156 -- SUMS
157 -- To select the right algorithme according to the type:
158 -- https://github.com/mikeizbicki/ifcxt
159
160 sumSimple :: Num a => [a] -> a
161 sumSimple = L.foldl' (+) 0
162
163 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
164 sumKahan :: Num a => [a] -> a
165 sumKahan = snd . L.foldl' go (0,0)
166 where
167 go (c,t) i = ((t'-t)-y,t')
168 where
169 y = i-c
170 t' = t+y
171
172 -- | compute part of the dict
173 count2map :: (Ord k, Foldable t) => t k -> Map k Double
174 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
175
176 -- | insert in a dict
177 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
178 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
179
180 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
181 trunc n = truncate . (* 10^n)
182
183 trunc' :: Int -> Double -> Double
184 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
185
186 ------------------------------------------------------------------------
187 bool2num :: Num a => Bool -> a
188 bool2num True = 1
189 bool2num False = 0
190
191 bool2double :: Bool -> Double
192 bool2double = bool2num
193
194 bool2int :: Bool -> Int
195 bool2int = bool2num
196 ------------------------------------------------------------------------
197
198 -- Normalizing && scaling data
199 scale :: [Double] -> [Double]
200 scale = scaleMinMax
201
202 scaleMinMax :: [Double] -> [Double]
203 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
204 where
205 ma = maximum xs'
206 mi = minimum xs'
207 xs' = map abs xs
208
209 scaleNormalize :: [Double] -> [Double]
210 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
211 where
212 v = variance xs'
213 m = mean xs'
214 xs' = map abs xs
215
216 normalize :: [Double] -> [Double]
217 normalize as = normalizeWith identity as
218
219 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
220 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
221 where
222 bs' = map extract bs
223
224 -- Zip functions to add
225 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
226 zipFst f xs = zip (f xs) xs
227
228 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
229 zipSnd f xs = zip xs (f xs)
230
231 -- | maximumWith
232 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
233 maximumWith f = L.maximumBy (compare `on` f)
234
235 -- | To get all combinations of a list with no
236 -- repetition and apply a function to the resulting list of pairs
237 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
238 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
239
240 ------------------------------------------------------------------------
241 -- Empty List Sugar Error Handling
242 -- TODO add Garg Monad Errors
243
244 listSafe1 :: Text -> ([a] -> Maybe a)
245 -> Text -> [a] -> a
246 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
247 where
248 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
249
250 head' :: Text -> [a] -> a
251 head' = listSafe1 "head" headMay
252
253 last' :: Text -> [a] -> a
254 last' = listSafe1 "last" lastMay
255
256 ------------------------------------------------------------------------
257
258 listSafeN :: Text -> ([a] -> Maybe [a])
259 -> Text -> [a] -> [a]
260 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
261 where
262 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
263
264 tail' :: Text -> [a] -> [a]
265 tail' = listSafeN "tail" tailMay
266
267 init' :: Text -> [a] -> [a]
268 init' = listSafeN "init" initMay
269
270 ------------------------------------------------------------------------
271 --- Some Statistics sugar functions
272 -- Exponential Average
273 eavg :: [Double] -> Double
274 eavg (x:xs) = a*x + (1-a)*(eavg xs)
275 where a = 0.70
276 eavg [] = 0
277
278 -- Simple Average
279 mean :: Fractional a => [a] -> a
280 mean xs = sum xs / fromIntegral (length xs)
281
282 sumMaybe :: Num a => [Maybe a] -> Maybe a
283 sumMaybe = fmap sum . M.sequence
284
285 variance :: Floating a => [a] -> a
286 variance xs = sum ys / (fromIntegral (length xs) - 1)
287 where
288 m = mean xs
289 ys = map (\x -> (x - m) ** 2) xs
290
291 deviation :: Floating a => [a] -> a
292 deviation = sqrt . variance
293
294 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
295 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
296
297 -----------------------------------------------------------------------
298 -----------------------------------------------------------------------
299 --- Map in Map = Map2
300 -- To avoid Map (a,a) b
301 type Map2 a b = Map a (Map a b)
302
303 lookup2 :: Ord a
304 => a
305 -> a
306 -> Map2 a b
307 -> Maybe b
308 lookup2 a b m = do
309 m' <- lookup a m
310 lookup b m'
311
312 -----------------------------------------------------------------------
313 foldM' :: (Monad m) => (a -> b -> m a) -> a -> [b] -> m a
314 foldM' _ z [] = return z
315 foldM' f z (x:xs) = do
316 z' <- f z x
317 z' `seq` foldM' f z' xs
318
319 -----------------------------------------------------------------------
320 -- | Instance for basic numerals
321 -- See the difference between Double and (Int Or Integer)
322 instance Monoid Double where
323 mempty = 1
324
325 instance Semigroup Double where
326 (<>) a b = a * b
327
328 -----------
329 instance Monoid Int where
330 mempty = 0
331
332 instance Semigroup Int where
333 (<>) a b = a + b
334 ----
335 instance Monoid Integer where
336 mempty = 0
337
338 instance Semigroup Integer where
339 (<>) a b = a + b