2 Module : Gargantext.Text.Context
3 Description : How to manage contexts of texts ?
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Context of text management tool, here are logic of main types:
18 How to split contexts is describes in this module.
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE OverloadedStrings #-}
25 module Gargantext.Text.Context
28 import Data.Text (Text, pack, unpack)
29 import Data.String (IsString)
31 import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
32 import Gargantext.Text
33 import Gargantext.Prelude hiding (length)
35 ------------------------------------------------------------------------
37 type MultiTerm = [Term]
38 type Label = MultiTerm
40 type TermList = [(Label, [MultiTerm])]
42 type Sentence a = [a] -- or a nominal group
43 type Corpus a = [Sentence a] -- a list of sentences
45 -- type ConText a = [Sentence a]
46 -- type Corpus a = [ConText a]
47 ------------------------------------------------------------------------
49 -- | Contexts definition to build/unbuild contexts.
50 data SplitContext = Chars Int | Sentences Int | Paragraphs Int
52 tag :: Text -> [Tag Text]
55 -- | splitBy contexts of Chars or Sentences or Paragraphs
56 -- To see some examples at a higher level (sentences and paragraph), see
57 -- 'Gargantext.Text.Examples.ex_terms'
59 -- >>> splitBy (Chars 0) (pack "abcde")
60 -- ["a","b","c","d","e"]
62 -- >>> splitBy (Chars 1) (pack "abcde")
63 -- ["ab","bc","cd","de"]
65 -- >>> splitBy (Chars 2) (pack "abcde")
66 -- ["abc","bcd","cde"]
67 splitBy :: SplitContext -> Text -> [Text]
68 splitBy (Chars n) = map pack . chunkAlong (n+1) 1 . unpack
69 splitBy (Sentences n) = map unsentences . chunkAlong (n+1) 1 . sentences
70 splitBy (Paragraphs _) = map unTag . filter isTagText . tag
72 unTag :: IsString p => Tag p -> p