1 module Data.Gargantext.Ngrams.Words where
2 import Data.List (partition)
3 import Data.Set (fromList, notMember, member)
4 import Data.Char (isPunctuation, toLower, isAlpha, isSpace)
6 import NLP.Stemmer (stem, Stemmer(..))
7 import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
8 import Language.Aspell.Options (ACOption(..))
10 --import Data.Either.Utils (fromRight)
11 import Data.ByteString.Internal (packChars)
15 let lang = Lang (packChars x)
16 spell_lang <- spellCheckerWithOptions [lang]
19 check' lang x = check lang (packChars x)
20 suggest' lang x = suggest lang (packChars x)
22 --spell_lang <- spellChecker
26 -- stem French "naturelles"
33 -- Prelude.map (\x -> stem French x) $ cleanText "Les hirondelles s envolent dans les cieux."
44 --words $ filter (not . isPunctuation) $ Prelude.map toLower text
45 words $ filter (\x -> isAlpha x || isSpace x) $ Prelude.map (repl . toLower) text
48 let miamWord_set = fromList ["salut", "phrase"]
49 member word miamWord_set
52 let stopWord_set = fromList ["de", "la", "une", "avec"]
53 member word stopWord_set
56 let text = "Salut, ceci est une phrase \n\n avec de la ponctuation !"
57 print $ partition (not . isStopWord) $ cleanText text
58 print $ filter (not . isStopWord) $ cleanText text
59 --print $ filter isStopWord $ words $ filter (not . isPunctuation) text