]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/RCT.hs
[CLEAN] fix gitignore on cabal files in order to minimize merge/error risks.
[gargantext.git] / src / Gargantext / RCT.hs
1 module Gargantext.RCT where
2
3 import Gargantext.Prelude
4
5 foo :: Int
6 foo = undefined
7 --import Data.Text (Text, words)
8 --import Data.Attoparsec.Text (anyChar, isEndOfLine, Parser, takeTill, many1, endOfLine, space, manyTill)
9 --import Control.Applicative (many)
10
11 -- RCT is the acronym for Referential ConText (of Text)
12 -- at the begin there was a byte
13 -- then a char
14 -- Char -> RCT [Char]
15
16 -- then a list of chars called a string, we call it a Form
17 -- (removing all weird charachters which are not alphanumeric)
18
19 -- Form -> RCT Sentence
20
21 -- These forms compose the RCT Sentence
22 -- an ngrams is composed with multiple forms
23
24 -- Paragraph = [Sentence]
25
26 -- type Title = Paragraph
27 -- data Block = [Paragraph]
28 -- Block is taken form Pandoc
29
30 -- data Document = [Block]
31
32 -- Set of databases
33 -- Database
34 -- Set of Articles
35 -- Article
36 -- Paragraph (abstract + title)
37 -- Sentence - Ngrams - Forms
38
39
40
41 --separateurs :: Parser Text
42 --separateurs = dropWhile isEndOfLine
43
44 --paragraphs :: Parser [Text]
45 --paragraphs = many paragraph
46 --
47 --paragraph :: Parser Text
48 --paragraph = takeTill isEndOfLine <* many1 endOfLine
49 --
50 -- forms :: Text -> [Text]
51 -- forms = words
52
53
54