2 Module : Gargantext.Text.Context
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Context of text management tool
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Text.Context where
19 import Data.Text (Text, pack, unpack, length)
20 import Data.String (IsString)
22 import Text.HTML.TagSoup
23 import Gargantext.Text
24 import Gargantext.Prelude hiding (length)
26 data SplitBy = Paragraph | Sentences | Chars
29 splitBy :: SplitBy -> Int -> Text -> [Text]
30 splitBy Chars n = map pack . chunkAlong n n . unpack
31 splitBy Sentences n = map unsentences . chunkAlong n n . sentences
32 splitBy Paragraph _ = map removeTag . filter isTagText . parseTags
34 removeTag :: IsString p => Tag p -> p
35 removeTag (TagText x) = x
36 removeTag (TagComment x) = x