]> Git — Sourcephile - gargantext.git/blob - src/Data/Gargantext/RCT.hs
[DRAFT] Parser main functions, for meeting.
[gargantext.git] / src / Data / Gargantext / RCT.hs
1 module Data.Gargantext.RCT where
2
3 foo :: Int
4 foo = undefined
5 --import Data.Text (Text, words)
6 --import Data.Attoparsec.Text (anyChar, isEndOfLine, Parser, takeTill, many1, endOfLine, space, manyTill)
7 --import Control.Applicative (many)
8
9 -- RCT is the acronym for Referential ConText (of Text)
10 -- at the begin there was a byte
11 -- then a char
12 -- Char -> RCT [Char]
13
14 -- then a list of chars called a string, we call it a Form
15 -- (removing all weird charachters which are not alphanumeric)
16
17 -- Form -> RCT Sentence
18
19 -- These forms compose the RCT Sentence
20 -- an ngrams is composed with multiple forms
21
22 -- Paragraph = [Sentence]
23
24 -- type Title = Paragraph
25 -- data Block = [Paragraph]
26 -- Block is taken form Pandoc
27
28 -- data Document = [Block]
29
30 -- Set of databases
31 -- Database
32 -- Set of Articles
33 -- Article
34 -- Paragraph (abstract + title)
35 -- Sentence - Ngrams - Forms
36
37
38
39 --separateurs :: Parser Text
40 --separateurs = dropWhile isEndOfLine
41
42 --paragraphs :: Parser [Text]
43 --paragraphs = many paragraph
44 --
45 --paragraph :: Parser Text
46 --paragraph = takeTill isEndOfLine <* many1 endOfLine
47 --
48 -- forms :: Text -> [Text]
49 -- forms = words
50
51
52