]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Prelude.hs
Add the clustering up to level 2 and more
[gargantext.git] / src / Gargantext / Prelude.hs
1 {-|
2 Module : Gargantext.Prelude
3 Description : Specific Prelude of the project
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12 -}
13
14 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
15 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
16
17 {-# LANGUAGE NoImplicitPrelude #-}
18 {-# LANGUAGE OverloadedStrings #-}
19
20 module Gargantext.Prelude
21 ( module Gargantext.Prelude
22 , module Protolude
23 , headMay, lastMay
24 , module GHC.Err.Located
25 , module Text.Show
26 , module Text.Read
27 , cs
28 , module Data.Maybe
29 , round
30 , sortWith
31 )
32 where
33
34 import GHC.Exts (sortWith)
35 import GHC.Err.Located (undefined)
36 import GHC.Real (round)
37 import Control.Monad.IO.Class (MonadIO)
38 import Data.Maybe (isJust, fromJust, maybe)
39 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
40 , Fractional, Num, Maybe(Just,Nothing)
41 , Enum, Bounded, Float
42 , Floating, Char, IO
43 , pure, (>>=), (=<<), (<*>), (<$>)
44 , putStrLn
45 , head, flip
46 , Ord, Integral, Foldable, RealFrac, Monad, filter
47 , reverse, map, mapM, zip, drop, take, zipWith
48 , sum, fromIntegral, length, fmap, foldl, foldl'
49 , takeWhile, sqrt, identity
50 , abs, min, max, maximum, minimum, return, snd, truncate
51 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
52 , Eq, (==), (>=), (<=), (<>), (/=)
53 , (&&), (||), not, any, all
54 , fst, snd, toS
55 , elem, die, mod, div, const, either
56 , curry, uncurry, repeat
57 , otherwise, when
58 , IO()
59 , compare
60 , on
61 , panic
62 )
63
64 -- TODO import functions optimized in Utils.Count
65 -- import Protolude hiding (head, last, all, any, sum, product, length)
66 -- import Gargantext.Utils.Count
67 import qualified Data.List as L hiding (head, sum)
68 import qualified Control.Monad as M
69
70 import Data.Map (Map)
71 import qualified Data.Map as M
72
73 import Data.Map.Strict (insertWith)
74 import qualified Data.Vector as V
75 import Safe (headMay, lastMay)
76 import Text.Show (Show(), show)
77 import Text.Read (Read())
78 import Data.String.Conversions (cs)
79
80
81 printDebug :: (Show a, MonadIO m) => [Char] -> a -> m ()
82 printDebug msg x = putStrLn $ msg <> " " <> show x
83 -- printDebug _ _ = pure ()
84
85
86 map2 :: (t -> b) -> [[t]] -> [[b]]
87 map2 fun = map (map fun)
88
89
90 -- Some Statistics sugar functions
91 -- Exponential Average
92 eavg :: [Double] -> Double
93 eavg (x:xs) = a*x + (1-a)*(eavg xs)
94 where a = 0.70
95 eavg [] = 0
96
97 -- Simple Average
98 mean :: Fractional a => [a] -> a
99 mean xs = if L.null xs then 0.0
100 else sum xs / fromIntegral (length xs)
101
102
103 sumMaybe :: Num a => [Maybe a] -> Maybe a
104 sumMaybe = fmap sum . M.sequence
105
106 variance :: Floating a => [a] -> a
107 variance xs = mean $ map (\x -> (x - m) ** 2) xs where
108 m = mean xs
109
110 deviation :: [Double] -> Double
111 deviation = sqrt . variance
112
113 movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
114 movingAverage steps xs = map mean $ chunkAlong steps 1 xs
115
116 ma :: [Double] -> [Double]
117 ma = movingAverage 3
118
119 -- | splitEvery n == chunkAlong n n
120 splitEvery :: Int -> [a] -> [[a]]
121 splitEvery _ [] = []
122 splitEvery n xs =
123 let (h,t) = L.splitAt n xs
124 in h : splitEvery n t
125
126 type Grain = Int
127 type Step = Int
128
129 -- | Function to split a range into chunks
130 -- if step == grain then linearity
131 -- elif step < grain then overlapping
132 -- else dotted with holes
133 chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
134 chunkAlong a b l = case a > 0 && b > 0 of
135 True -> chunkAlong_ a b l
136 False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"
137
138 chunkAlong_ :: Eq a => Int -> Int -> [a] -> [[a]]
139 chunkAlong_ a b l = filter (/= []) $ only (while dropAlong)
140 where
141 only = map (take a)
142 while = takeWhile (\x -> length x >= a)
143 dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
144
145 -- | Optimized version (Vector)
146 chunkAlong' :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
147 chunkAlong' a b l = only (while dropAlong)
148 where
149 only = V.map (V.take a)
150 while = V.takeWhile (\x -> V.length x >= a)
151 dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
152
153 -- | TODO Inverse of chunk ? unchunkAlong ?
154 -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
155 -- unchunkAlong = undefined
156
157
158 -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
159 splitAlong :: [Int] -> [Char] -> [[Char]]
160 splitAlong _ [] = [] -- No list? done
161 splitAlong [] xs = [xs] -- No place to split at? Return the remainder
162 splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys) -- take until our split spot, recurse with next split spot and list remainder
163
164 takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
165 takeWhileM _ [] = return []
166 takeWhileM p (a:as) = do
167 v <- a
168 if p v
169 then do
170 vs <- takeWhileM p as
171 return (v:vs)
172 else return []
173
174 -- SUMS
175 -- To select the right algorithme according to the type:
176 -- https://github.com/mikeizbicki/ifcxt
177
178 sumSimple :: Num a => [a] -> a
179 sumSimple = L.foldl' (+) 0
180
181 -- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
182 sumKahan :: Num a => [a] -> a
183 sumKahan = snd . L.foldl' go (0,0)
184 where
185 go (c,t) i = ((t'-t)-y,t')
186 where
187 y = i-c
188 t' = t+y
189
190 -- | compute part of the dict
191 count2map :: (Ord k, Foldable t) => t k -> Map k Double
192 count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
193
194 -- | insert in a dict
195 count2map' :: (Ord k, Foldable t) => t k -> Map k Double
196 count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
197
198
199 trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
200 trunc n = truncate . (* 10^n)
201
202 trunc' :: Int -> Double -> Double
203 trunc' n x = fromIntegral $ truncate $ (x * 10^n)
204
205
206 ------------------------------------------------------------------------
207 bool2num :: Num a => Bool -> a
208 bool2num True = 1
209 bool2num False = 0
210
211 bool2double :: Bool -> Double
212 bool2double = bool2num
213
214 bool2int :: Bool -> Int
215 bool2int = bool2num
216 ------------------------------------------------------------------------
217
218 -- Normalizing && scaling data
219 scale :: [Double] -> [Double]
220 scale = scaleMinMax
221
222 scaleMinMax :: [Double] -> [Double]
223 scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
224 where
225 ma = maximum xs'
226 mi = minimum xs'
227 xs' = map abs xs
228
229 scaleNormalize :: [Double] -> [Double]
230 scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
231 where
232 v = variance xs'
233 m = mean xs'
234 xs' = map abs xs
235
236 normalize :: [Double] -> [Double]
237 normalize as = normalizeWith identity as
238
239 normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
240 normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
241 where
242 bs' = map extract bs
243
244 -- Zip functions to add
245 zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
246 zipFst f xs = zip (f xs) xs
247
248 zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
249 zipSnd f xs = zip xs (f xs)
250
251 -- | maximumWith
252 maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
253 maximumWith f = L.maximumBy (compare `on` f)
254