]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Ngrams/TextMining.hs
[FEAT/STEM] implementing Porter lib into Gargantext for English language.
[gargantext.git] / src / Gargantext / Ngrams / TextMining.hs
1 {-|
2 Module : Gargantext.Ngrams.TextMining
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15
16 module Gargantext.Ngrams.TextMining where
17
18 import Gargantext.Prelude
19 import Data.Ord(Ordering(LT,GT), compare)
20 import Data.Text (pack)
21 import Data.Bool (otherwise)
22 import Data.Map (empty, Map, insertWith, toList)
23 import Data.List (foldl, foldl')
24 import qualified Data.List as L
25
26 sortGT :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering
27 sortGT (a1, b1) (a2, b2)
28 | a1 < a2 = GT
29 | a1 > a2 = LT
30 | a1 == a2 = compare b1 b2
31 sortGT (_, _) (_, _) = panic (pack "What is this case ?")
32
33
34 --histogram :: Ord a => [a] -> [(a, Int)]
35 --histogram = map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
36 --histogram = sortGT Prelude.. $ map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
37
38 countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
39 countElem m e = Data.Map.insertWith (\n o -> n + o) e 1 m
40
41 freqList :: (Ord k) => [k] -> Data.Map.Map k Int
42 freqList = foldl countElem Data.Map.empty
43
44 getMaxFromMap :: Ord a => Map a1 a -> [a1]
45 getMaxFromMap m = go [] Nothing (toList m)
46 where
47 go ks _ [] = ks
48 go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
49 go ks (Just u) ((k,v):rest)
50 | v < u = go ks (Just u) rest
51 | v > u = go [k] (Just v) rest
52 | otherwise = go (k:ks) (Just v) rest
53
54 merge :: [a] -> [a] -> [a]
55 merge [] ys = ys
56 merge (x:xs) ys = x:merge ys xs
57
58 average :: [Double] -> Double
59 average x = L.sum x / L.genericLength x
60
61 average' :: [Int] -> Double
62 average' x = (L.sum y) / (L.genericLength y) where
63 y = L.map fromIntegral x
64
65
66 countYear :: [Integer] -> Map Integer Integer
67 countYear [] = empty
68 countYear (x:xs) = insertWith (+) x 1 (countYear xs)
69
70 countYear' :: [Integer] -> Map Integer Integer
71 countYear' (xs) = foldl' (\x y -> insertWith (+) y 1 x) empty xs
72