]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Context.hs
[FEAT] Cooc -> Matrix conversions tools.
[gargantext.git] / src / Gargantext / Text / Context.hs
1 {-|
2 Module : Gargantext.Text.Context
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Context of text management tool
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Text.Context where
18
19 import Data.Text (Text, pack, unpack, length)
20 import Data.String (IsString)
21
22 import Text.HTML.TagSoup
23 import Gargantext.Text
24 import Gargantext.Prelude hiding (length)
25
26 data SplitBy = Paragraph | Sentences | Chars
27
28
29 splitBy :: SplitBy -> Int -> Text -> [Text]
30 splitBy Chars n = map pack . chunkAlong n n . unpack
31 splitBy Sentences n = map unsentences . chunkAlong n n . sentences
32 splitBy Paragraph _ = map removeTag . filter isTagText . parseTags
33 where
34 removeTag :: IsString p => Tag p -> p
35 removeTag (TagText x) = x
36 removeTag (TagComment x) = x
37 removeTag _ = ""
38
39