]> Git — Sourcephile - gargantext.git/blob - src/Data/Gargantext/Ngrams/TextMining.hs
[FIX] removing duplicated imports.
[gargantext.git] / src / Data / Gargantext / Ngrams / TextMining.hs
1 module Data.Gargantext.Ngrams.TextMining where
2
3 import Data.Map (empty, Map, insertWith, toList)
4 import Data.List (foldl, foldl')
5 import qualified Data.List as L
6
7
8 sortGT (a1, b1) (a2, b2)
9 | a1 < a2 = GT
10 | a1 > a2 = LT
11 | a1 == a2 = compare b1 b2
12
13
14 --histogram :: Ord a => [a] -> [(a, Int)]
15 --histogram = map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
16 --histogram = sortGT Prelude.. $ map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
17
18 countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
19 countElem m e = Data.Map.insertWith (\n o -> n + o) e 1 m
20
21 freqList :: (Ord k) => [k] -> Data.Map.Map k Int
22 freqList = foldl countElem Data.Map.empty
23
24 --getMaxFromMap :: Data.Map.Map -> Maybe -> [a] -> [a]
25 getMaxFromMap m = go [] Nothing (toList m)
26 where
27 go ks _ [] = ks
28 go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
29 go ks (Just u) ((k,v):rest)
30 | v < u = go ks (Just u) rest
31 | v > u = go [k] (Just v) rest
32 | otherwise = go (k:ks) (Just v) rest
33
34 merge :: [a] -> [a] -> [a]
35 merge [] ys = ys
36 merge (x:xs) ys = x:merge ys xs
37
38 average :: [Double] -> Double
39 average x = L.sum x / L.genericLength x
40
41 average' :: [Int] -> Double
42 average' x = (L.sum y) / (L.genericLength y) where
43 y = map fromIntegral x
44
45
46 countYear :: [Integer] -> Map Integer Integer
47 countYear [] = empty
48 countYear (x:xs) = insertWith (+) x 1 (countYear xs)
49
50 countYear' :: [Integer] -> Map Integer Integer
51 countYear' (xs) = foldl' (\x y -> insertWith (+) y 1 x) empty xs
52
53
54 textMiningMain :: IO ()
55 textMiningMain = do
56 print $ merge ["abc"] ["bcd"]