2 Module : Gargantext.Text.Context
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Context of text management tool
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Text.Context
20 import Data.Text (Text, pack, unpack)
21 import Data.String (IsString)
23 import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
24 import Gargantext.Text
25 import Gargantext.Prelude hiding (length)
28 data SplitContext = Chars Int | Sentences Int | Paragraphs Int
30 tag :: Text -> [Tag Text]
33 -- | splitBy contexts of Chars or Sentences or Paragraphs
34 -- >> splitBy (Chars 0) "abcde"
35 -- ["a","b","c","d","e"]
36 -- >> splitBy (Chars 1) "abcde"
37 -- ["ab","bc","cd","de"]
38 -- >> splitBy (Chars 2) "abcde"
39 -- ["abc","bcd","cde"]
40 splitBy :: SplitContext -> Text -> [Text]
41 splitBy (Chars n) = map pack . chunkAlong (n+1) 1 . unpack
42 splitBy (Sentences n) = map unsentences . chunkAlong (n+1) 1 . sentences
43 splitBy (Paragraphs _) = map unTag . filter isTagText . tag
45 unTag :: IsString p => Tag p -> p