]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[CLEAN] replace NgramsTerms with Text to avoid ambiguity with Types of Ngrams
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 {-# OPTIONS_GHC -fno-warn-orphans #-}
14 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
15
16 module Gargantext.Prelude
17 ( module Gargantext.Prelude
18 , module Protolude
19 , headMay, lastMay
20 , module GHC.Err.Located
21 , module Text.Show
22 , module Text.Read
23 , cs
24 , module Data.Maybe
25 , round
26 , sortWith
27 , module Prelude
28 , MonadBase(..)
29 , Typeable
30 )
31 where
32
33 import Control.Monad.Base (MonadBase(..))
34 import GHC.Exts (sortWith)
35 import GHC.Err.Located (undefined)
36 import GHC.Real (round)
37 import Data.Map (Map, lookup)
38 import Data.Maybe (isJust, fromJust, maybe)
39 import Data.Monoid (Monoid, mempty)
40 import Data.Semigroup (Semigroup, (<>))
41 import Data.Text (Text)
42 import Data.Typeable (Typeable)
43 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
44 , Fractional, Num, Maybe(Just,Nothing)
45 , Enum, Bounded, Float
46 , Floating, Char, IO
47 , Functor(..)
48 , pure, (>>=), (=<<), (<*>), (<$>), (<&>), (>>)
49 , head, flip
50 , Ord, Integral, Foldable, RealFrac, Monad, filter
51 , reverse, map, mapM, zip, drop, take, zipWith
52 , sum, fromIntegral, length, fmap, foldl, foldl'
53 , takeWhile, sqrt, identity
54 , abs, min, max, maximum, minimum, return, snd, truncate
55 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
56 , Eq, (==), (>=), (<=), (<>), (/=), xor
57 , (&&), (||), not, any, all
58 , concatMap
59 , fst, snd, toS
60 , elem, die, mod, div, const, either
61 , curry, uncurry, repeat
62 , otherwise, when
63 , IO()
64 , compare
65 , on
66 , panic
67 , seq
68 )
69
70 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
71 -- TODO import functions optimized in Utils.Count
72 -- import Protolude hiding (head, last, all, any, sum, product, length)
73 -- import Gargantext.Utils.Count
74 import qualified Data.List as L hiding (head, sum)
75 import qualified Control.Monad as M
76 import qualified Data.Map as M
77 import Data.Map.Strict (insertWith)
78 import qualified Data.Vector as V
79 import Safe (headMay, lastMay, initMay, tailMay)
80 import Text.Show (Show(), show)
81 import Text.Read (Read())
82 import Data.String.Conversions (cs)
83
84
85 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
86 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
87 -- printDebug _ _ = pure ()
88
89
90 -- | splitEvery n == chunkAlong n n
91 splitEvery :: Int -> [a] -> [[a]]
92 splitEvery _ [] = []
93 splitEvery n xs =
94 let (h,t) = L.splitAt n xs
95 in h : splitEvery n t
96
97 type Grain = Int
98 type Step = Int
99
100 -- | Function to split a range into chunks
101 -- if step == grain then linearity (splitEvery)
102 -- elif step < grain then overlapping
103 -- else dotted with holes
104 -- TODO FIX BUG if Steps*Grain /= length l
105 -- chunkAlong 10 10 [1..15] == [1..10]
106 -- BUG: what about the rest of (divMod 15 10)?
107 -- TODO: chunkAlongNoRest or chunkAlongWithRest
108 -- default behavior: NoRest
109
110 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
111 chunkAlong a b l = case a >= length l of
112 True -> [l]
113 False -> chunkAlong' a b l
114
115 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
116 chunkAlong' a b l = case a > 0 && b > 0 of
117 True -> chunkAlong'' a b l
118 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
119
120 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
121 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
122 where
123 only = map (take a)
124 while = takeWhile (\x -> length x >= a)
125 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
126
127 -- | Optimized version (Vector)
128 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
129 chunkAlongV a b l = only (while dropAlong)
130 where
131 only = V.map (V.take a)
132 while = V.takeWhile (\x -> V.length x >= a)
133 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
134
135 -- | TODO Inverse of chunk ? unchunkAlong ?
136 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
137 -- unchunkAlong = undefined
138
139
140 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
141 splitAlong :: [Int] -> [Char] -> [[Char]]
142 splitAlong _ [] = [] -- No list? done
143 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
144 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
145 -- take until our split spot, recurse with next split spot and list remainder
146
147 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
148 takeWhileM _ [] = return []
149 takeWhileM p (a:as) = do
150 v <- a
151 if p v
152 then do
153 vs <- takeWhileM p as
154 return (v:vs)
155 else return []
156
157 -- SUMS
158 -- To select the right algorithme according to the type:
159 -- https://github.com/mikeizbicki/ifcxt
160
161 sumSimple :: Num a => [a] -> a
162 sumSimple = L.foldl' (+) 0
163
164 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
165 sumKahan :: Num a => [a] -> a
166 sumKahan = snd . L.foldl' go (0,0)
167 where
168 go (c,t) i = ((t'-t)-y,t')
169 where
170 y = i-c
171 t' = t+y
172
173 -- | compute part of the dict
174 count2map :: (Ord k, Foldable t) => t k -> Map k Double
175 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
176
177 -- | insert in a dict
178 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
179 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
180
181 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
182 trunc n = truncate . (* 10^n)
183
184 trunc' :: Int -> Double -> Double
185 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
186
187 ------------------------------------------------------------------------
188 bool2num :: Num a => Bool -> a
189 bool2num True = 1
190 bool2num False = 0
191
192 bool2double :: Bool -> Double
193 bool2double = bool2num
194
195 bool2int :: Bool -> Int
196 bool2int = bool2num
197 ------------------------------------------------------------------------
198
199 -- Normalizing && scaling data
200 scale :: [Double] -> [Double]
201 scale = scaleMinMax
202
203 scaleMinMax :: [Double] -> [Double]
204 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
205 where
206 ma = maximum xs'
207 mi = minimum xs'
208 xs' = map abs xs
209
210 scaleNormalize :: [Double] -> [Double]
211 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
212 where
213 v = variance xs'
214 m = mean xs'
215 xs' = map abs xs
216
217 normalize :: [Double] -> [Double]
218 normalize as = normalizeWith identity as
219
220 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
221 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
222 where
223 bs' = map extract bs
224
225 -- Zip functions to add
226 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
227 zipFst f xs = zip (f xs) xs
228
229 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
230 zipSnd f xs = zip xs (f xs)
231
232 -- | maximumWith
233 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
234 maximumWith f = L.maximumBy (compare `on` f)
235
236 -- | To get all combinations of a list with no
237 -- repetition and apply a function to the resulting list of pairs
238 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
239 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
240
241 ------------------------------------------------------------------------
242 -- Empty List Sugar Error Handling
243 -- TODO add Garg Monad Errors
244
245 listSafe1 :: Text -> ([a] -> Maybe a)
246 -> Text -> [a] -> a
247 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
248 where
249 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
250
251 head' :: Text -> [a] -> a
252 head' = listSafe1 "head" headMay
253
254 last' :: Text -> [a] -> a
255 last' = listSafe1 "last" lastMay
256
257 ------------------------------------------------------------------------
258
259 listSafeN :: Text -> ([a] -> Maybe [a])
260 -> Text -> [a] -> [a]
261 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
262 where
263 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
264
265 tail' :: Text -> [a] -> [a]
266 tail' = listSafeN "tail" tailMay
267
268 init' :: Text -> [a] -> [a]
269 init' = listSafeN "init" initMay
270
271 ------------------------------------------------------------------------
272 --- Some Statistics sugar functions
273 -- Exponential Average
274 eavg :: [Double] -> Double
275 eavg (x:xs) = a*x + (1-a)*(eavg xs)
276 where a = 0.70
277 eavg [] = 0
278
279 -- Simple Average
280 mean :: Fractional a => [a] -> a
281 mean xs = sum xs / fromIntegral (length xs)
282
283 sumMaybe :: Num a => [Maybe a] -> Maybe a
284 sumMaybe = fmap sum . M.sequence
285
286 variance :: Floating a => [a] -> a
287 variance xs = sum ys / (fromIntegral (length xs) - 1)
288 where
289 m = mean xs
290 ys = map (\x -> (x - m) ** 2) xs
291
292 deviation :: Floating a => [a] -> a
293 deviation = sqrt . variance
294
295 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
296 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
297
298 -----------------------------------------------------------------------
299 -----------------------------------------------------------------------
300 --- Map in Map = Map2
301 -- To avoid Map (a,a) b
302 type Map2 a b = Map a (Map a b)
303
304 lookup2 :: Ord a
305 => a
306 -> a
307 -> Map2 a b
308 -> Maybe b
309 lookup2 a b m = do
310 m' <- lookup a m
311 lookup b m'
312
313 -----------------------------------------------------------------------
314 foldM' :: (Monad m) => (a -> b -> m a) -> a -> [b] -> m a
315 foldM' _ z [] = return z
316 foldM' f z (x:xs) = do
317 z' <- f z x
318 z' `seq` foldM' f z' xs
319
320 -----------------------------------------------------------------------
321 -- | Instance for basic numerals
322 -- See the difference between Double and (Int Or Integer)
323 instance Monoid Double where
324 mempty = 1
325
326 instance Semigroup Double where
327 (<>) a b = a * b
328
329 -----------
330 instance Monoid Int where
331 mempty = 0
332
333 instance Semigroup Int where
334 (<>) a b = a + b
335 ----
336 instance Monoid Integer where
337 mempty = 0
338
339 instance Semigroup Integer where
340 (<>) a b = a + b