]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Ngrams/TextMining.hs
[PHYLO] step -> period.
[gargantext.git] / src / Gargantext / Ngrams / TextMining.hs
1
2 module Gargantext.Ngrams.TextMining where
3
4 import Gargantext.Prelude
5 import Data.Ord(Ordering(LT,GT), compare)
6 import Data.Text (pack)
7 import Data.Bool (otherwise)
8 import Data.Map (empty, Map, insertWith, toList)
9 import Data.List (foldl, foldl')
10 import qualified Data.List as L
11
12 sortGT :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering
13 sortGT (a1, b1) (a2, b2)
14 | a1 < a2 = GT
15 | a1 > a2 = LT
16 | a1 == a2 = compare b1 b2
17 sortGT (_, _) (_, _) = panic (pack "What is this case ?")
18
19
20 --histogram :: Ord a => [a] -> [(a, Int)]
21 --histogram = map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
22 --histogram = sortGT Prelude.. $ map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
23
24 countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
25 countElem m e = Data.Map.insertWith (\n o -> n + o) e 1 m
26
27 freqList :: (Ord k) => [k] -> Data.Map.Map k Int
28 freqList = foldl countElem Data.Map.empty
29
30 getMaxFromMap :: Ord a => Map a1 a -> [a1]
31 getMaxFromMap m = go [] Nothing (toList m)
32 where
33 go ks _ [] = ks
34 go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
35 go ks (Just u) ((k,v):rest)
36 | v < u = go ks (Just u) rest
37 | v > u = go [k] (Just v) rest
38 | otherwise = go (k:ks) (Just v) rest
39
40 merge :: [a] -> [a] -> [a]
41 merge [] ys = ys
42 merge (x:xs) ys = x:merge ys xs
43
44 average :: [Double] -> Double
45 average x = L.sum x / L.genericLength x
46
47 average' :: [Int] -> Double
48 average' x = (L.sum y) / (L.genericLength y) where
49 y = L.map fromIntegral x
50
51
52 countYear :: [Integer] -> Map Integer Integer
53 countYear [] = empty
54 countYear (x:xs) = insertWith (+) x 1 (countYear xs)
55
56 countYear' :: [Integer] -> Map Integer Integer
57 countYear' (xs) = foldl' (\x y -> insertWith (+) y 1 x) empty xs
58