]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[FACTORING] G.Text.Terms.
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
14
15 {-# LANGUAGE FlexibleContexts #-}
16 {-# LANGUAGE NoImplicitPrelude #-}
17 {-# LANGUAGE OverloadedStrings #-}
18 {-# LANGUAGE RankNTypes #-}
19
20 module Gargantext.Prelude
21 ( module Gargantext.Prelude
22 , module Protolude
23 , headMay, lastMay
24 , module GHC.Err.Located
25 , module Text.Show
26 , module Text.Read
27 , cs
28 , module Data.Maybe
29 , round
30 , sortWith
31 , module Prelude
32 , MonadBase(..)
33 )
34 where
35
36 import Control.Monad.Base (MonadBase(..))
37 import GHC.Exts (sortWith)
38 import GHC.Err.Located (undefined)
39 import GHC.Real (round)
40 import Data.Maybe (isJust, fromJust, maybe)
41 import Data.Text (Text)
42 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
43 , Fractional, Num, Maybe(Just,Nothing)
44 , Enum, Bounded, Float
45 , Floating, Char, IO
46 , pure, (>>=), (=<<), (<*>), (<$>), (>>)
47 , head, flip
48 , Ord, Integral, Foldable, RealFrac, Monad, filter
49 , reverse, map, mapM, zip, drop, take, zipWith
50 , sum, fromIntegral, length, fmap, foldl, foldl'
51 , takeWhile, sqrt, identity
52 , abs, min, max, maximum, minimum, return, snd, truncate
53 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
54 , Eq, (==), (>=), (<=), (<>), (/=)
55 , (&&), (||), not, any, all
56 , concatMap
57 , fst, snd, toS
58 , elem, die, mod, div, const, either
59 , curry, uncurry, repeat
60 , otherwise, when
61 , IO()
62 , compare
63 , on
64 , panic
65 )
66
67 import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
68 -- TODO import functions optimized in Utils.Count
69 -- import Protolude hiding (head, last, all, any, sum, product, length)
70 -- import Gargantext.Utils.Count
71 import qualified Data.List as L hiding (head, sum)
72 import qualified Control.Monad as M
73
74 import Data.Map (Map)
75 import qualified Data.Map as M
76
77 import Data.Map.Strict (insertWith)
78 import qualified Data.Vector as V
79 import Safe (headMay, lastMay, initMay, tailMay)
80 import Text.Show (Show(), show)
81 import Text.Read (Read())
82 import Data.String.Conversions (cs)
83
84
85 printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
86 printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
87 -- printDebug _ _ = pure ()
88
89
90 -- | splitEvery n == chunkAlong n n
91 splitEvery :: Int -> [a] -> [[a]]
92 splitEvery _ [] = []
93 splitEvery n xs =
94 let (h,t) = L.splitAt n xs
95 in h : splitEvery n t
96
97 type Grain = Int
98 type Step = Int
99
100 -- | Function to split a range into chunks
101 -- if step == grain then linearity (splitEvery)
102 -- elif step < grain then overlapping
103 -- else dotted with holes
104 -- TODO FIX BUG if Steps*Grain /= length l
105 -- chunkAlong 10 10 [1..15] == [1..10]
106 -- BUG: what about the rest of (divMod 15 10)?
107 -- TODO: chunkAlongNoRest or chunkAlongWithRest
108 -- default behavior: NoRest
109
110 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
111 chunkAlong a b l = case a >= length l of
112 True -> [l]
113 False -> chunkAlong' a b l
114
115 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
116 chunkAlong' a b l = case a > 0 && b > 0 of
117 True -> chunkAlong'' a b l
118 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
119
120 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
121 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
122 where
123 only = map (take a)
124 while = takeWhile (\x -> length x >= a)
125 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
126
127 -- | Optimized version (Vector)
128 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
129 chunkAlongV a b l = only (while dropAlong)
130 where
131 only = V.map (V.take a)
132 while = V.takeWhile (\x -> V.length x >= a)
133 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
134
135 -- | TODO Inverse of chunk ? unchunkAlong ?
136 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
137 -- unchunkAlong = undefined
138
139
140 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
141 splitAlong :: [Int] -> [Char] -> [[Char]]
142 splitAlong _ [] = [] -- No list? done
143 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
144 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
145 -- take until our split spot, recurse with next split spot and list remainder
146
147 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
148 takeWhileM _ [] = return []
149 takeWhileM p (a:as) = do
150 v <- a
151 if p v
152 then do
153 vs <- takeWhileM p as
154 return (v:vs)
155 else return []
156
157 -- SUMS
158 -- To select the right algorithme according to the type:
159 -- https://github.com/mikeizbicki/ifcxt
160
161 sumSimple :: Num a => [a] -> a
162 sumSimple = L.foldl' (+) 0
163
164 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
165 sumKahan :: Num a => [a] -> a
166 sumKahan = snd . L.foldl' go (0,0)
167 where
168 go (c,t) i = ((t'-t)-y,t')
169 where
170 y = i-c
171 t' = t+y
172
173 -- | compute part of the dict
174 count2map :: (Ord k, Foldable t) => t k -> Map k Double
175 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
176
177 -- | insert in a dict
178 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
179 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
180
181 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
182 trunc n = truncate . (* 10^n)
183
184 trunc' :: Int -> Double -> Double
185 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
186
187 ------------------------------------------------------------------------
188 bool2num :: Num a => Bool -> a
189 bool2num True = 1
190 bool2num False = 0
191
192 bool2double :: Bool -> Double
193 bool2double = bool2num
194
195 bool2int :: Bool -> Int
196 bool2int = bool2num
197 ------------------------------------------------------------------------
198
199 -- Normalizing && scaling data
200 scale :: [Double] -> [Double]
201 scale = scaleMinMax
202
203 scaleMinMax :: [Double] -> [Double]
204 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
205 where
206 ma = maximum xs'
207 mi = minimum xs'
208 xs' = map abs xs
209
210 scaleNormalize :: [Double] -> [Double]
211 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
212 where
213 v = variance xs'
214 m = mean xs'
215 xs' = map abs xs
216
217 normalize :: [Double] -> [Double]
218 normalize as = normalizeWith identity as
219
220 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
221 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
222 where
223 bs' = map extract bs
224
225 -- Zip functions to add
226 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
227 zipFst f xs = zip (f xs) xs
228
229 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
230 zipSnd f xs = zip xs (f xs)
231
232 -- | maximumWith
233 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
234 maximumWith f = L.maximumBy (compare `on` f)
235
236 -- | To get all combinations of a list with no
237 -- repetition and apply a function to the resulting list of pairs
238 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
239 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
240
241 ------------------------------------------------------------------------
242 -- Empty List Sugar Error Handling
243 -- TODO add Garg Monad Errors
244
245 listSafe1 :: Text -> ([a] -> Maybe a)
246 -> Text -> [a] -> a
247 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
248 where
249 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
250
251 head' :: Text -> [a] -> a
252 head' = listSafe1 "head" headMay
253
254 last' :: Text -> [a] -> a
255 last' = listSafe1 "last" lastMay
256
257 ------------------------------------------------------------------------
258
259 listSafeN :: Text -> ([a] -> Maybe [a])
260 -> Text -> [a] -> [a]
261 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
262 where
263 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
264
265 tail' :: Text -> [a] -> [a]
266 tail' = listSafeN "tail" tailMay
267
268 init' :: Text -> [a] -> [a]
269 init' = listSafeN "init" initMay
270
271 ------------------------------------------------------------------------
272 --- Some Statistics sugar functions
273 -- Exponential Average
274 eavg :: [Double] -> Double
275 eavg (x:xs) = a*x + (1-a)*(eavg xs)
276 where a = 0.70
277 eavg [] = 0
278
279 -- Simple Average
280 mean :: Fractional a => [a] -> a
281 mean xs = sum xs / fromIntegral (length xs)
282
283 sumMaybe :: Num a => [Maybe a] -> Maybe a
284 sumMaybe = fmap sum . M.sequence
285
286 variance :: Floating a => [a] -> a
287 variance xs = sum ys / (fromIntegral (length xs) - 1)
288 where
289 m = mean xs
290 ys = map (\x -> (x - m) ** 2) xs
291
292 deviation :: Floating a => [a] -> a
293 deviation = sqrt . variance
294
295 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
296 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
297
298 ma :: [Double] -> [Double]
299 ma = movingAverage 3
300
301
302 -----------------------------------------------------------------------
303 fib :: Int -> Int
304 fib 0 = 0
305 fib 1 = 1
306 fib n = fib (n-1) + fib (n-2)