1 {-# LANGUAGE OverloadedStrings #-}
3 module Gargantext.Ngrams.Count where
5 import Gargantext.Prelude
8 import Data.Foldable as F
11 import qualified Data.Map as M
13 --import qualified Data.Text.Lazy.IO as DTLIO
14 import qualified Data.Text.Lazy as DTL
16 -- | /O(n)/ Breaks a 'Text' up into each Text list of chars.
17 -- from slower to faster:
18 letters :: DTL.Text -> [DTL.Text]
19 letters text = DTL.chunksOf 1 text
21 letters' :: DTL.Text -> [DTL.Text]
22 letters' text = DTL.splitOn "#" $ DTL.intersperse '#' text
24 letters'' :: DTL.Text -> [DTL.Text]
25 letters'' = DTL.foldr (\ch xs -> DTL.singleton ch : xs) []
30 -- words between punctuation
31 -- number of punctuation
33 occurrences :: Ord a => [a] -> Map a Int
34 occurrences xs = foldl' (\x y -> M.insertWith' (+) y 1 x) M.empty xs
37 --occurrences' :: Ord a => [a] -> Map a Integer
38 --occurrences' xs = DTL.foldl (\x y -> M.insertWith' (+) y 1 x) M.empty xs
42 -- (fichier:_) <- getArgs
43 -- c <- DTLIO.readFile fichier
44 -- --print $ occurrences $ DTL.chunksOf 1 c
45 -- pure $ occurrences $ letters'' c
46 -- --print $ occurrences $ DTL.words $ DTL.toLower c