2 Module : Gargantext.Core.Text.Context
3 Description : How to manage contexts of texts ?
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Context of text management tool, here are logic of main types:
18 How to split contexts is describes in this module.
23 module Gargantext.Core.Text.Context
26 import Data.Text (Text, pack, unpack)
27 import Data.String (IsString)
29 import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
30 import Gargantext.Core.Text
31 import Gargantext.Prelude hiding (length)
33 ------------------------------------------------------------------------
35 type MultiTerm = [Term]
36 type Label = MultiTerm
38 type TermList = [(Label, [MultiTerm])]
40 type Sentence a = [a] -- or a nominal group
41 type Corpus a = [Sentence a] -- a list of sentences
43 -- type ConText a = [Sentence a]
44 -- type Corpus a = [ConText a]
45 ------------------------------------------------------------------------
47 -- | Contexts definition to build/unbuild contexts.
48 data SplitContext = Chars Int | Sentences Int | Paragraphs Int
50 -- | splitBy contexts of Chars or Sentences or Paragraphs
51 -- To see some examples at a higher level (sentences and paragraph), see
52 -- 'Gargantext.Core.Text.Examples.ex_terms'
54 -- >>> splitBy (Chars 0) (pack "abcde")
55 -- ["a","b","c","d","e"]
57 -- >>> splitBy (Chars 1) (pack "abcde")
58 -- ["ab","bc","cd","de"]
60 -- >>> splitBy (Chars 2) (pack "abcde")
61 -- ["abc","bcd","cde"]
62 splitBy :: SplitContext -> Text -> [Text]
63 splitBy (Chars n) = map pack . chunkAlong (n+1) 1 . unpack
64 splitBy (Sentences n) = map unsentences . chunkAlong (n+1) 1 . sentences
65 splitBy (Paragraphs _) = map unTag . filter isTagText . parseTags
67 unTag :: IsString p => Tag p -> p