5 Copyright : (c) CNRS, 2017-Present
6 License : AGPL + CECILL v3
7 Maintainer : team@gargantext.org
8 Stability : experimental
14 module Ngrams.NLP where
16 import Data.Text (Text)
18 import Gargantext.Prelude
19 import Gargantext.Core.Text.Terms.Multi
24 describe "Text that should be cleaned before sending it to NLP tools as micro-services." $ do
25 let text = "This is a url http://cnrs.gargantext.org to be remove and another one www.gargantext.org and digits 343242-2332 to be remove and some to keep: 232 231 33." :: Text
26 let result = "This is a url to be remove and another one and digits to be remove and some to keep: 232 231 33."
27 it "NLP Clean Text before sending to micro services:" $ cleanTextForNLP text `shouldBe` result