1 module Gargantext.RCT where
3 import Gargantext.Prelude
7 --import Data.Text (Text, words)
8 --import Data.Attoparsec.Text (anyChar, isEndOfLine, Parser, takeTill, many1, endOfLine, space, manyTill)
9 --import Control.Applicative (many)
11 -- RCT is the acronym for Referential ConText (of Text)
12 -- at the begin there was a byte
16 -- then a list of chars called a string, we call it a Form
17 -- (removing all weird charachters which are not alphanumeric)
19 -- Form -> RCT Sentence
21 -- These forms compose the RCT Sentence
22 -- an ngrams is composed with multiple forms
24 -- Paragraph = [Sentence]
26 -- type Title = Paragraph
27 -- data Block = [Paragraph]
28 -- Block is taken form Pandoc
30 -- data Document = [Block]
36 -- Paragraph (abstract + title)
37 -- Sentence - Ngrams - Forms
41 --separateurs :: Parser Text
42 --separateurs = dropWhile isEndOfLine
44 --paragraphs :: Parser [Text]
45 --paragraphs = many paragraph
47 --paragraph :: Parser Text
48 --paragraph = takeTill isEndOfLine <* many1 endOfLine
50 -- forms :: Text -> [Text]