2 module Data.Gargantext.Ngrams.Words where
3 import Data.List (partition)
4 import Data.Set (fromList, notMember, member)
5 import Data.Char (isPunctuation, toLower, isAlpha, isSpace)
7 import NLP.Stemmer (stem, Stemmer(..))
8 import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
9 import Language.Aspell.Options (ACOption(..))
11 --import Data.Either.Utils (fromRight)
12 import Data.ByteString.Internal (packChars)
16 let lang = Lang (packChars x)
17 spell_lang <- spellCheckerWithOptions [lang]
20 check' lang x = check lang (packChars x)
21 suggest' lang x = suggest lang (packChars x)
23 --spell_lang <- spellChecker
27 -- stem French "naturelles"
34 -- Prelude.map (\x -> stem French x) $ cleanText "Les hirondelles s envolent dans les cieux."
45 --words $ filter (not . isPunctuation) $ Prelude.map toLower text
46 words $ filter (\x -> isAlpha x || isSpace x) $ Prelude.map (repl . toLower) text
49 let miamWord_set = fromList ["salut", "phrase"]
50 member word miamWord_set
53 let stopWord_set = fromList ["de", "la", "une", "avec"]
54 member word stopWord_set
57 let text = "Salut, ceci est une phrase \n\n avec de la ponctuation !"
58 print $ partition (not . isStopWord) $ cleanText text
59 print $ filter (not . isStopWord) $ cleanText text
60 --print $ filter isStopWord $ words $ filter (not . isPunctuation) text