src/Gargantext/Text/Ngrams/Token.hs

   1 {-|
   2 Module      : Gargantext.Text.Ngrams.Token
   3 Description : Tokens and tokenizing a text
   4 Copyright   : (c) CNRS, 2017-Present
   5 License     : AGPL + CECILL v3
   6 Maintainer  : team@gargantext.org
   7 Stability   : experimental
   8 Portability : POSIX
   9
  10 In computer science, lexical analysis, lexing or tokenization is the
  11 process of converting a sequence of characters (such as in a computer
  12 program or web page) into a sequence of tokens (strings with an assigned
  13 and thus identified meaning).
  14 Source: https://en.wikipedia.org/wiki/Tokenize
  15
  16 -}
  17
  18 {-# LANGUAGE NoImplicitPrelude #-}
  19
  20 module Gargantext.Text.Ngrams.Token (tokenize)
  21   where
  22
  23 import Data.Text (Text)
  24 import qualified Gargantext.Text.Ngrams.Token.Text as En
  25
  26 type Token = Text
  27
  28 -- >>> tokenize "A rose is a rose is a rose."
  29 -- ["A","rose","is","a","rose","is","a","rose", "."]
  30 --
  31 tokenize :: Text -> [Token]
  32 tokenize = En.tokenize
  33