]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[ngrams] version API endpoint for ngrams table
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
14
15
16 module Gargantext.Prelude
17 ( module Gargantext.Prelude
18 , module Protolude
19 , headMay, lastMay
20 , module GHC.Err.Located
21 , module Text.Show
22 , module Text.Read
23 , cs
24 , module Data.Maybe
25 , round
26 , sortWith
27 , module Prelude
28 , MonadBase(..)
29 )
30 where
31
32 import Control.Monad.Base (MonadBase(..))
33 import GHC.Exts (sortWith)
34 import GHC.Err.Located (undefined)
35 import GHC.Real (round)
36 import Data.Map (Map, lookup)
37 import Data.Maybe (isJust, fromJust, maybe)
38 import Data.Text (Text)
39 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
40 , Fractional, Num, Maybe(Just,Nothing)
41 , Enum, Bounded, Float
42 , Floating, Char, IO
43 , pure, (>>=), (=<<), (<*>), (<$>), (<&>), (>>)
44 , head, flip
45 , Ord, Integral, Foldable, RealFrac, Monad, filter
46 , reverse, map, mapM, zip, drop, take, zipWith
47 , sum, fromIntegral, length, fmap, foldl, foldl'
48 , takeWhile, sqrt, identity
49 , abs, min, max, maximum, minimum, return, snd, truncate
50 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
51 , Eq, (==), (>=), (<=), (<>), (/=)
52 , (&&), (||), not, any, all
53 , concatMap
54 , fst, snd, toS
55 , elem, die, mod, div, const, either
56 , curry, uncurry, repeat
57 , otherwise, when
58 , IO()
59 , compare
60 , on
61 , panic
62 , seq
63 )
64
65 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
66 -- TODO import functions optimized in Utils.Count
67 -- import Protolude hiding (head, last, all, any, sum, product, length)
68 -- import Gargantext.Utils.Count
69 import qualified Data.List as L hiding (head, sum)
70 import qualified Control.Monad as M
71 import qualified Data.Map as M
72 import Data.Map.Strict (insertWith)
73 import qualified Data.Vector as V
74 import Safe (headMay, lastMay, initMay, tailMay)
75 import Text.Show (Show(), show)
76 import Text.Read (Read())
77 import Data.String.Conversions (cs)
78
79
80 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
81 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
82 -- printDebug _ _ = pure ()
83
84
85 -- | splitEvery n == chunkAlong n n
86 splitEvery :: Int -> [a] -> [[a]]
87 splitEvery _ [] = []
88 splitEvery n xs =
89 let (h,t) = L.splitAt n xs
90 in h : splitEvery n t
91
92 type Grain = Int
93 type Step = Int
94
95 -- | Function to split a range into chunks
96 -- if step == grain then linearity (splitEvery)
97 -- elif step < grain then overlapping
98 -- else dotted with holes
99 -- TODO FIX BUG if Steps*Grain /= length l
100 -- chunkAlong 10 10 [1..15] == [1..10]
101 -- BUG: what about the rest of (divMod 15 10)?
102 -- TODO: chunkAlongNoRest or chunkAlongWithRest
103 -- default behavior: NoRest
104
105 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
106 chunkAlong a b l = case a >= length l of
107 True -> [l]
108 False -> chunkAlong' a b l
109
110 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
111 chunkAlong' a b l = case a > 0 && b > 0 of
112 True -> chunkAlong'' a b l
113 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
114
115 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
116 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
117 where
118 only = map (take a)
119 while = takeWhile (\x -> length x >= a)
120 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
121
122 -- | Optimized version (Vector)
123 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
124 chunkAlongV a b l = only (while dropAlong)
125 where
126 only = V.map (V.take a)
127 while = V.takeWhile (\x -> V.length x >= a)
128 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
129
130 -- | TODO Inverse of chunk ? unchunkAlong ?
131 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
132 -- unchunkAlong = undefined
133
134
135 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
136 splitAlong :: [Int] -> [Char] -> [[Char]]
137 splitAlong _ [] = [] -- No list? done
138 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
139 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
140 -- take until our split spot, recurse with next split spot and list remainder
141
142 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
143 takeWhileM _ [] = return []
144 takeWhileM p (a:as) = do
145 v <- a
146 if p v
147 then do
148 vs <- takeWhileM p as
149 return (v:vs)
150 else return []
151
152 -- SUMS
153 -- To select the right algorithme according to the type:
154 -- https://github.com/mikeizbicki/ifcxt
155
156 sumSimple :: Num a => [a] -> a
157 sumSimple = L.foldl' (+) 0
158
159 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
160 sumKahan :: Num a => [a] -> a
161 sumKahan = snd . L.foldl' go (0,0)
162 where
163 go (c,t) i = ((t'-t)-y,t')
164 where
165 y = i-c
166 t' = t+y
167
168 -- | compute part of the dict
169 count2map :: (Ord k, Foldable t) => t k -> Map k Double
170 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
171
172 -- | insert in a dict
173 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
174 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
175
176 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
177 trunc n = truncate . (* 10^n)
178
179 trunc' :: Int -> Double -> Double
180 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
181
182 ------------------------------------------------------------------------
183 bool2num :: Num a => Bool -> a
184 bool2num True = 1
185 bool2num False = 0
186
187 bool2double :: Bool -> Double
188 bool2double = bool2num
189
190 bool2int :: Bool -> Int
191 bool2int = bool2num
192 ------------------------------------------------------------------------
193
194 -- Normalizing && scaling data
195 scale :: [Double] -> [Double]
196 scale = scaleMinMax
197
198 scaleMinMax :: [Double] -> [Double]
199 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
200 where
201 ma = maximum xs'
202 mi = minimum xs'
203 xs' = map abs xs
204
205 scaleNormalize :: [Double] -> [Double]
206 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
207 where
208 v = variance xs'
209 m = mean xs'
210 xs' = map abs xs
211
212 normalize :: [Double] -> [Double]
213 normalize as = normalizeWith identity as
214
215 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
216 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
217 where
218 bs' = map extract bs
219
220 -- Zip functions to add
221 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
222 zipFst f xs = zip (f xs) xs
223
224 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
225 zipSnd f xs = zip xs (f xs)
226
227 -- | maximumWith
228 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
229 maximumWith f = L.maximumBy (compare `on` f)
230
231 -- | To get all combinations of a list with no
232 -- repetition and apply a function to the resulting list of pairs
233 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
234 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
235
236 ------------------------------------------------------------------------
237 -- Empty List Sugar Error Handling
238 -- TODO add Garg Monad Errors
239
240 listSafe1 :: Text -> ([a] -> Maybe a)
241 -> Text -> [a] -> a
242 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
243 where
244 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
245
246 head' :: Text -> [a] -> a
247 head' = listSafe1 "head" headMay
248
249 last' :: Text -> [a] -> a
250 last' = listSafe1 "last" lastMay
251
252 ------------------------------------------------------------------------
253
254 listSafeN :: Text -> ([a] -> Maybe [a])
255 -> Text -> [a] -> [a]
256 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
257 where
258 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
259
260 tail' :: Text -> [a] -> [a]
261 tail' = listSafeN "tail" tailMay
262
263 init' :: Text -> [a] -> [a]
264 init' = listSafeN "init" initMay
265
266 ------------------------------------------------------------------------
267 --- Some Statistics sugar functions
268 -- Exponential Average
269 eavg :: [Double] -> Double
270 eavg (x:xs) = a*x + (1-a)*(eavg xs)
271 where a = 0.70
272 eavg [] = 0
273
274 -- Simple Average
275 mean :: Fractional a => [a] -> a
276 mean xs = sum xs / fromIntegral (length xs)
277
278 sumMaybe :: Num a => [Maybe a] -> Maybe a
279 sumMaybe = fmap sum . M.sequence
280
281 variance :: Floating a => [a] -> a
282 variance xs = sum ys / (fromIntegral (length xs) - 1)
283 where
284 m = mean xs
285 ys = map (\x -> (x - m) ** 2) xs
286
287 deviation :: Floating a => [a] -> a
288 deviation = sqrt . variance
289
290 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
291 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
292
293 ma :: [Double] -> [Double]
294 ma = movingAverage 3
295
296 -----------------------------------------------------------------------
297 -----------------------------------------------------------------------
298 --- Map in Map = Map2
299 -- To avoid Map (a,a) b
300 type Map2 a b = Map a (Map a b)
301
302 lookup2 :: Ord a
303 => a
304 -> a
305 -> Map2 a b
306 -> Maybe b
307 lookup2 a b m = do
308 m' <- lookup a m
309 lookup b m'
310
311 -----------------------------------------------
312
313 foldM' :: (Monad m) => (a -> b -> m a) -> a -> [b] -> m a
314 foldM' _ z [] = return z
315 foldM' f z (x:xs) = do
316 z' <- f z x
317 z' `seq` foldM' f z' xs
318
319