]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/RCT.hs
[PHYLO/FIX] time format.
[gargantext.git] / src / Gargantext / RCT.hs
1 {-|
2 Module : Gargantext.
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE NoImplicitPrelude #-}
13
14 module Gargantext.RCT where
15
16 import Gargantext.Prelude
17
18 foo :: Int
19 foo = undefined
20 --import Data.Text (Text, words)
21 --import Data.Attoparsec.Text (anyChar, isEndOfLine, Parser, takeTill, many1, endOfLine, space, manyTill)
22 --import Control.Applicative (many)
23
24 -- RCT is the acronym for Referential ConText (of Text)
25 -- at the begin there was a byte
26 -- then a char
27 -- Char -> RCT [Char]
28
29 -- then a list of chars called a string, we call it a Form
30 -- (removing all weird charachters which are not alphanumeric)
31
32 -- Form -> RCT Sentence
33
34 -- These forms compose the RCT Sentence
35 -- an ngrams is composed with multiple forms
36
37 -- Paragraph = [Sentence]
38
39 -- type Title = Paragraph
40 -- data Block = [Paragraph]
41 -- Block is taken form Pandoc
42
43 -- data Document = [Block]
44
45 -- Set of databases
46 -- Database
47 -- Set of Articles
48 -- Article
49 -- Paragraph (abstract + title)
50 -- Sentence - Ngrams - Forms
51
52
53
54 --separateurs :: Parser Text
55 --separateurs = dropWhile isEndOfLine
56
57 --paragraphs :: Parser [Text]
58 --paragraphs = many paragraph
59 --
60 --paragraph :: Parser Text
61 --paragraph = takeTill isEndOfLine <* many1 endOfLine
62 --
63 -- forms :: Text -> [Text]
64 -- forms = words
65
66