]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Context.hs
[FEAT] Adding External module for IMT community manager
[gargantext.git] / src / Gargantext / Text / Context.hs
1 {-|
2 Module : Gargantext.Text.Context
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Context of text management tool
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Text.Context
18 where
19
20 import Data.Text (Text, pack, unpack)
21 import Data.String (IsString)
22
23 import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
24 import Gargantext.Text
25 import Gargantext.Prelude hiding (length)
26
27
28 data SplitContext = Chars Int | Sentences Int | Paragraphs Int
29
30 tag :: Text -> [Tag Text]
31 tag = parseTags
32
33 -- | splitBy contexts of Chars or Sentences or Paragraphs
34 -- >> splitBy (Chars 0) "abcde"
35 -- ["a","b","c","d","e"]
36 -- >> splitBy (Chars 1) "abcde"
37 -- ["ab","bc","cd","de"]
38 -- >> splitBy (Chars 2) "abcde"
39 -- ["abc","bcd","cde"]
40 splitBy :: SplitContext -> Text -> [Text]
41 splitBy (Chars n) = map pack . chunkAlong (n+1) 1 . unpack
42 splitBy (Sentences n) = map unsentences . chunkAlong (n+1) 1 . sentences
43 splitBy (Paragraphs _) = map unTag . filter isTagText . tag
44 where
45 unTag :: IsString p => Tag p -> p
46 unTag (TagText x) = x
47 unTag _ = ""
48
49