2 Module : Gargantext.Text.Context
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Context of text management tool
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Text.Context
20 import Data.Text (Text, pack, unpack, length)
21 import Data.String (IsString)
23 import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
24 import Gargantext.Text
25 import Gargantext.Prelude hiding (length)
28 data SplitContext = Chars Int | Sentences Int | Paragraphs Int
31 -- | splitBy contexts of Chars or Sentences or Paragraphs
32 -- >> splitBy (Chars 0) "abcde"
33 -- ["a","b","c","d","e"]
34 -- >> splitBy (Chars 1) "abcde"
35 -- ["ab","bc","cd","de"]
36 -- >> splitBy (Chars 2) "abcde"
37 -- ["abc","bcd","cde"]
38 splitBy :: SplitContext -> Text -> [Text]
39 splitBy (Chars n) = map pack . chunkAlong (n+1) 1 . unpack
40 splitBy (Sentences n) = map unsentences . chunkAlong (n+1) 1 . sentences
41 splitBy (Paragraphs _) = map unTag . filter isTagText . tag
43 unTag :: IsString p => Tag p -> p