{-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence)
+module Gargantext.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence, words)
where
import Prelude (String)
-import Data.Char (isSpace)
-import Data.Text (Text, toLower, split, splitOn, pack)
import Data.Text (Text)
import qualified Data.Text as T
-- | TODO remove Num ?
--isGram c = isAlphaNum c
+words :: Text -> [Text]
+words = monoTexts
-- | Sentence split separators
isSep :: Char -> Bool
-isSep = (`elem` (",.:;?!(){}[]\"" :: String))
-
+isSep = (`elem` (",.:;?!(){}[]\"\'" :: String))
monoTerms :: Lang -> Text -> [Terms]
monoTerms l txt = map (monoText2term l) $ monoTexts txt
monoTexts :: Text -> [Text]
monoTexts = L.concat . monoTextsBySentence
+-- | TODO use text2term only
monoText2term :: Lang -> Text -> Terms
monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt)
-
monoTextsBySentence :: Text -> [[Text]]
-monoTextsBySentence = map (T.split isSpace)
+monoTextsBySentence = map T.words
. T.split isSep
. T.toLower
-
-
-
-