]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
[FEAT/STEM] implemenging Porter lib into Gargantext for English language.
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12 -}
13
14 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
15 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
16
17 {-# LANGUAGE NoImplicitPrelude #-}
18
19 module Gargantext.Prelude
20 ( module Gargantext.Prelude
21 , module Protolude
22 , headMay
23 , module Text.Show
24 , module Text.Read
25 , cs
26 , module Data.Maybe
27 )
28 where
29
30 import Data.Maybe (isJust, fromJust, maybe)
31 import Protolude ( Bool(True, False), Int, Double, Integer
32 , Fractional, Num, Maybe(Just,Nothing)
33 , Floating, Char, IO
34 , pure, (<$>), panic
35 , head, flip
36 , Ord, Integral, Foldable, RealFrac, Monad, filter
37 , reverse, map, zip, drop, take, zipWith
38 , sum, fromIntegral, length, fmap, foldl, foldl'
39 , takeWhile, sqrt, undefined, identity
40 , abs, min, max, maximum, minimum, return, snd, truncate
41 , (+), (*), (/), (-), (.), ($), (**), (^), (<), (>), log
42 , Eq, (==), (>=), (<=), (<>), (/=)
43 , (&&), (||), not
44 , fst, snd, toS
45 , elem, die, mod, div, const
46 , curry, uncurry
47 , otherwise
48 )
49
50 -- TODO import functions optimized in Utils.Count
51 -- import Protolude hiding (head, last, all, any, sum, product, length)
52 -- import Gargantext.Utils.Count
53 import qualified Data.List as L hiding (head, sum)
54 import qualified Control.Monad as M
55
56 import Data.Map (Map)
57 import qualified Data.Map as M
58
59 import Data.Map.Strict (insertWith)
60 import qualified Data.Vector as V
61 import Safe (headMay)
62 import Text.Show (Show(), show)
63 import Text.Read (Read())
64 import Data.String.Conversions (cs)
65
66 --pf :: (a -> Bool) -> [a] -> [a]
67 --pf = filter
68
69 pr :: [a] -> [a]
70 pr = reverse
71
72 --pm :: (a -> b) -> [a] -> [b]
73 --pm = map
74
75 map2 :: (t -> b) -> [[t]] -> [[b]]
76 map2 fun = map (map fun)
77
78 pz :: [a] -> [b] -> [(a, b)]
79 pz = zip
80
81 pd :: Int -> [a] -> [a]
82 pd = drop
83
84 ptk :: Int -> [a] -> [a]
85 ptk = take
86
87 pzw :: (a -> b -> c) -> [a] -> [b] -> [c]
88 pzw = zipWith
89
90 -- Exponential Average
91 eavg :: [Double] -> Double
92 eavg (x:xs) = a*x + (1-a)*(eavg xs)
93 where a = 0.70
94 eavg [] = 0
95
96 -- Simple Average
97 mean :: Fractional a => [a] -> a
98 mean xs = if L.null xs then 0.0
99 else sum xs / fromIntegral (length xs)
100
101 sumMaybe :: Num a => [Maybe a] -> Maybe a
102 sumMaybe = fmap sum . M.sequence
103
104 variance :: Floating a => [a] -> a
105 variance xs = mean $ map (\x -> (x - m) ** 2) xs where
106 m = mean xs
107
108 deviation :: [Double] -> Double
109 deviation = sqrt . variance
110
111 movingAverage :: Fractional b => Int -> [b] -> [b]
112 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
113
114 ma :: [Double] -> [Double]
115 ma = movingAverage 3
116
117
118 -- | Function to split a range into chunks
119 chunkAlong :: Int -> Int -> [a] -> [[a]]
120 chunkAlong a b l = only (while dropAlong)
121 where
122 only = map (take a)
123 while = takeWhile (\x -> length x >= a)
124 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
125
126 -- | Optimized version (Vector)
127 chunkAlong' :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
128 chunkAlong' a b l = only (while dropAlong)
129 where
130 only = V.map (V.take a)
131 while = V.takeWhile (\x -> V.length x >= a)
132 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
133
134 -- | TODO Inverse of chunk ? unchunkAlong ?
135 unchunkAlong :: Int -> Int -> [[a]] -> [a]
136 unchunkAlong = undefined
137
138
139 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
140 splitAlong :: [Int] -> [Char] -> [[Char]]
141 splitAlong _ [] = [] -- No list? done
142 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
143 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys) -- take until our split spot, recurse with next split spot and list remainder
144
145 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
146 takeWhileM _ [] = return []
147 takeWhileM p (a:as) = do
148 v <- a
149 if p v
150 then do
151 vs <- takeWhileM p as
152 return (v:vs)
153 else return []
154
155 -- SUMS
156 -- To select the right algorithme according to the type:
157 -- https://github.com/mikeizbicki/ifcxt
158
159 sumSimple :: Num a => [a] -> a
160 sumSimple = L.foldl' (+) 0
161
162 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
163 sumKahan :: Num a => [a] -> a
164 sumKahan = snd . L.foldl' go (0,0)
165 where
166 go (c,t) i = ((t'-t)-y,t')
167 where
168 y = i-c
169 t' = t+y
170
171 -- | compute part of the dict
172 count2map :: (Ord k, Foldable t) => t k -> Map k Double
173 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
174
175 -- | insert in a dict
176 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
177 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
178
179
180 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
181 trunc n = truncate . (* 10^n)
182
183 trunc' :: Int -> Double -> Double
184 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
185
186
187 bool2int :: Num a => Bool -> a
188 bool2int b = case b of
189 True -> 1
190 False -> 0
191
192 bool2double :: Bool -> Double
193 bool2double bool = case bool of
194 True -> 1.0
195 False -> 0.0
196
197
198
199 -- Normalizing && scaling data
200 scale :: [Double] -> [Double]
201 scale = scaleMinMax
202
203 scaleMinMax :: [Double] -> [Double]
204 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
205 where
206 ma = maximum xs'
207 mi = minimum xs'
208 xs' = map abs xs
209
210 scaleNormalize :: [Double] -> [Double]
211 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
212 where
213 v = variance xs'
214 m = mean xs'
215 xs' = map abs xs
216
217
218
219 normalize :: [Double] -> [Double]
220 normalize as = normalizeWith identity as
221
222 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
223 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
224 where
225 bs' = map extract bs
226
227 -- Zip functions to add
228 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
229 zipFst f xs = zip (f xs) xs
230
231 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
232 zipSnd f xs = zip xs (f xs)
233
234
235 -- Just
236 unMaybe :: [Maybe a] -> [a]
237 unMaybe = map fromJust . L.filter isJust
238
239 -- | Syntactic convention for the reader/writer coordination.
240 -- @Motivation@: explicit functional flux ease coordination between
241 -- readers and writers who are not always the same individuals. Each
242 -- natural languages has its own syntaxical conventions from left to
243 -- right or the contrary. In computer programming languages it depends
244 -- on context of the algorithm itself and we need some clarity since
245 -- both are possible, here is a proposition to get more explicitiness.
246
247 -- | (<|) is called : "Pipe rightLeft" as "from right to left". The most right
248 -- function sends its output to the most left function which takes it as
249 -- input.
250 (<|) :: (a -> b) -> a -> b
251 (<|) = ($)
252
253 -- | (|>) is called : "Pipe leftRight" as "from left to right". The most left
254 -- function sends its output to the most right function which takes it as
255 -- input. (|>) == (&) = True -- in base prelude
256 (|>) :: a -> (a -> c) -> c
257 (|>) = flip ($)
258
259 -- | Function composition orientation
260 (<.) :: (b -> c) -> (a -> b) -> a -> c
261 (<.) = (.)
262
263 -- | Function composition orientation
264 (.>) :: (a -> b) -> (b -> c) -> a -> c
265 (.>) = flip (.)
266
267