]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[ELEVE] Ngrams, still NaN.
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12 -}
13
14 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
15 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
16
17 {-# LANGUAGE NoImplicitPrelude #-}
18 {-# LANGUAGE OverloadedStrings #-}
19 {-# LANGUAGE RankNTypes #-}
20
21 module Gargantext.Prelude
22 ( module Gargantext.Prelude
23 , module Protolude
24 , headMay, lastMay
25 , module GHC.Err.Located
26 , module Text.Show
27 , module Text.Read
28 , cs
29 , module Data.Maybe
30 , round
31 , sortWith
32 )
33 where
34
35 import GHC.Exts (sortWith)
36 import GHC.Err.Located (undefined)
37 import GHC.Real (round)
38 import Control.Monad.IO.Class (MonadIO)
39 import Data.Maybe (isJust, fromJust, maybe)
40 import Data.Text (Text)
41 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
42 , Fractional, Num, Maybe(Just,Nothing)
43 , Enum, Bounded, Float
44 , Floating, Char, IO
45 , pure, (>>=), (=<<), (<*>), (<$>)
46 , putStrLn
47 , head, flip
48 , Ord, Integral, Foldable, RealFrac, Monad, filter
49 , reverse, map, mapM, zip, drop, take, zipWith
50 , sum, fromIntegral, length, fmap, foldl, foldl'
51 , takeWhile, sqrt, identity
52 , abs, min, max, maximum, minimum, return, snd, truncate
53 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
54 , Eq, (==), (>=), (<=), (<>), (/=)
55 , (&&), (||), not, any, all
56 , fst, snd, toS
57 , elem, die, mod, div, const, either
58 , curry, uncurry, repeat
59 , otherwise, when
60 , IO()
61 , compare
62 , on
63 , panic
64 )
65
66 -- TODO import functions optimized in Utils.Count
67 -- import Protolude hiding (head, last, all, any, sum, product, length)
68 -- import Gargantext.Utils.Count
69 import qualified Data.List as L hiding (head, sum)
70 import qualified Control.Monad as M
71
72 import Data.Map (Map)
73 import qualified Data.Map as M
74
75 import Data.Map.Strict (insertWith)
76 import qualified Data.Vector as V
77 import Safe (headMay, lastMay, initMay, tailMay)
78 import Text.Show (Show(), show)
79 import Text.Read (Read())
80 import Data.String.Conversions (cs)
81
82
83 printDebug :: (Show a, MonadIO m) => [Char] -> a -> m ()
84 printDebug msg x = putStrLn $ msg <> " " <> show x
85 -- printDebug _ _ = pure ()
86
87
88 map2 :: (t -> b) -> [[t]] -> [[b]]
89 map2 fun = map (map fun)
90
91
92 -- Some Statistics sugar functions
93 -- Exponential Average
94 eavg :: [Double] -> Double
95 eavg (x:xs) = a*x + (1-a)*(eavg xs)
96 where a = 0.70
97 eavg [] = 0
98
99 -- Simple Average
100 mean :: Fractional a => [a] -> a
101 mean xs = sum xs / fromIntegral (length xs)
102
103
104 sumMaybe :: Num a => [Maybe a] -> Maybe a
105 sumMaybe = fmap sum . M.sequence
106
107 variance :: Floating a => [a] -> a
108 variance xs = sum ys / (fromIntegral (length xs) - 1)
109 where
110 m = mean xs
111 ys = map (\x -> (x - m) ** 2) xs
112
113
114 deviation :: Floating a => [a] -> a
115 deviation = sqrt . variance
116
117 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
118 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
119
120 ma :: [Double] -> [Double]
121 ma = movingAverage 3
122
123 -- | splitEvery n == chunkAlong n n
124 splitEvery :: Int -> [a] -> [[a]]
125 splitEvery _ [] = []
126 splitEvery n xs =
127 let (h,t) = L.splitAt n xs
128 in h : splitEvery n t
129
130 type Grain = Int
131 type Step = Int
132
133 -- | Function to split a range into chunks
134 -- if step == grain then linearity (splitEvery)
135 -- elif step < grain then overlapping
136 -- else dotted with holes
137 -- TODO FIX BUG if Steps*Grain /= length l
138 -- chunkAlong 10 10 [1..15] == [1..10]
139 -- BUG: what about the rest of (divMod 15 10)?
140 -- TODO: chunkAlongNoRest or chunkAlongWithRest
141 -- default behavior: NoRest
142
143 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
144 chunkAlong a b l = case a >= length l of
145 True -> [l]
146 False -> chunkAlong' a b l
147
148 chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
149 chunkAlong' a b l = case a > 0 && b > 0 of
150 True -> chunkAlong'' a b l
151 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
152
153 chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
154 chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
155 where
156 only = map (take a)
157 while = takeWhile (\x -> length x >= a)
158 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
159
160 -- | Optimized version (Vector)
161 chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
162 chunkAlongV a b l = only (while dropAlong)
163 where
164 only = V.map (V.take a)
165 while = V.takeWhile (\x -> V.length x >= a)
166 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
167
168 -- | TODO Inverse of chunk ? unchunkAlong ?
169 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
170 -- unchunkAlong = undefined
171
172
173 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
174 splitAlong :: [Int] -> [Char] -> [[Char]]
175 splitAlong _ [] = [] -- No list? done
176 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
177 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys) -- take until our split spot, recurse with next split spot and list remainder
178
179 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
180 takeWhileM _ [] = return []
181 takeWhileM p (a:as) = do
182 v <- a
183 if p v
184 then do
185 vs <- takeWhileM p as
186 return (v:vs)
187 else return []
188
189 -- SUMS
190 -- To select the right algorithme according to the type:
191 -- https://github.com/mikeizbicki/ifcxt
192
193 sumSimple :: Num a => [a] -> a
194 sumSimple = L.foldl' (+) 0
195
196 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
197 sumKahan :: Num a => [a] -> a
198 sumKahan = snd . L.foldl' go (0,0)
199 where
200 go (c,t) i = ((t'-t)-y,t')
201 where
202 y = i-c
203 t' = t+y
204
205 -- | compute part of the dict
206 count2map :: (Ord k, Foldable t) => t k -> Map k Double
207 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
208
209 -- | insert in a dict
210 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
211 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
212
213
214 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
215 trunc n = truncate . (* 10^n)
216
217 trunc' :: Int -> Double -> Double
218 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
219
220
221 ------------------------------------------------------------------------
222 bool2num :: Num a => Bool -> a
223 bool2num True = 1
224 bool2num False = 0
225
226 bool2double :: Bool -> Double
227 bool2double = bool2num
228
229 bool2int :: Bool -> Int
230 bool2int = bool2num
231 ------------------------------------------------------------------------
232
233 -- Normalizing && scaling data
234 scale :: [Double] -> [Double]
235 scale = scaleMinMax
236
237 scaleMinMax :: [Double] -> [Double]
238 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
239 where
240 ma = maximum xs'
241 mi = minimum xs'
242 xs' = map abs xs
243
244 scaleNormalize :: [Double] -> [Double]
245 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
246 where
247 v = variance xs'
248 m = mean xs'
249 xs' = map abs xs
250
251 normalize :: [Double] -> [Double]
252 normalize as = normalizeWith identity as
253
254 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
255 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
256 where
257 bs' = map extract bs
258
259 -- Zip functions to add
260 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
261 zipFst f xs = zip (f xs) xs
262
263 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
264 zipSnd f xs = zip xs (f xs)
265
266 -- | maximumWith
267 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
268 maximumWith f = L.maximumBy (compare `on` f)
269
270
271 -- | To get all combinations of a list with no repetition and apply a function to the resulting list of pairs
272 listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
273 listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l, y <- rest ]
274
275 ------------------------------------------------------------------------
276 -- Empty List Sugar Error Handling
277 -- TODO add Garg Monad Errors
278
279 listSafe1 :: Text -> ([a] -> Maybe a)
280 -> Text -> [a] -> a
281 listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
282 where
283 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
284
285 head' :: Text -> [a] -> a
286 head' = listSafe1 "head" headMay
287
288 last' :: Text -> [a] -> a
289 last' = listSafe1 "last" lastMay
290
291 ------------------------------------------------------------------------
292
293 listSafeN :: Text -> ([a] -> Maybe [a])
294 -> Text -> [a] -> [a]
295 listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
296 where
297 h = "[ERR][Gargantext] Empty list for " <> s <> " in "
298
299 tail' :: Text -> [a] -> [a]
300 tail' = listSafeN "tail" tailMay
301
302 init' :: Text -> [a] -> [a]
303 init' = listSafeN "init" initMay
304