1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 module Hdoc.TCT.Read.Tree where
8 import Control.Applicative (Applicative(..), Alternative(..))
9 import Control.Monad (Monad(..), void)
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Function (($), (.))
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.List.NonEmpty (NonEmpty(..))
16 import Data.Maybe (Maybe(..))
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.TreeSeq.Strict (Tree(..), Trees, tree0)
20 import qualified Data.Char as Char
21 import qualified Data.List as List
22 import qualified Data.Sequence as Seq
23 import qualified Data.Text.Lazy as TL
24 import qualified Text.Megaparsec as P
25 import qualified Text.Megaparsec.Char as P
30 import Hdoc.TCT.Read.Cell
31 import Hdoc.TCT.Read.Elem
32 import Hdoc.TCT.Read.Token
34 p_CellHeader :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
35 p_CellHeader row = debugParser "CellHeader" $ do
36 P.skipMany $ P.char ' '
37 Cell sp h <- p_Cell $ do
38 debugParser "Header" $
40 [ P.try $ P.char '-' >>
41 P.char ' ' $> HeaderDash <|>
42 P.string "- " $> HeaderDashDash
46 <* P.lookAhead (P.try $ P.eof <|> void (P.satisfy $ \c -> c=='\n'||c==' '))
47 , P.try $ P.some (P.char '#') <* P.lookAhead (P.try $ P.char ' ') >>= \hs ->
48 return $ HeaderSection $ List.length hs
51 <$> P.between (P.string "[") (P.string "]:")
52 (P.takeWhile1P (Just "Reference") isReferenceChar)
53 -- <* P.lookAhead (P.try $ P.eof <|> void (P.satisfy (=='\n')))
55 (\f -> HeaderDotSlash $ "./"<>f)
57 <*> P.many (P.satisfy (/='\n'))
62 [ P.try $ HeaderColon name wh
64 <* P.lookAhead (P.try $ P.eof <|> void (P.satisfy $ \c -> c=='\n'||c==' '))
65 , P.char '>' $> HeaderGreat name wh
66 , P.char '=' $> HeaderEqual name wh
67 , P.char '|' $> HeaderBar name wh
70 let row' = Tree (Cell sp $ NodeHeader h) mempty : row
72 HeaderSection{} -> p_CellEnd row'
73 HeaderDash{} -> p_Row row'
74 HeaderDashDash{} -> p_CellRaw row'
75 HeaderDot{} -> p_Row row'
76 HeaderColon{} -> p_Row row'
77 HeaderBrackets{} -> p_Row row'
78 HeaderGreat{} -> p_Row row'
79 HeaderEqual{} -> p_CellRaw row'
80 HeaderBar{} -> p_CellRaw row'
81 HeaderDotSlash{} -> p_CellEnd row'
83 isReferenceChar :: Char -> Bool
90 p_Name :: P.Tokens s ~ TL.Text => Parser e s Name
93 (\h t -> Text.pack (h:t))
94 <$> (P.satisfy $ \c -> Char.isAlphaNum c || c=='_')
95 <*> P.takeWhile1P (P.satisfy $ \c -> Char.isAlphaNum c || c=='-' || c=='_')
98 p_Line :: P.Tokens s ~ TL.Text => Parser e s TL.Text
99 p_Line = P.takeWhileP (Just "Line") (/='\n')
101 p_Line1 :: P.Tokens s ~ TL.Text => Parser e s TL.Text
102 p_Line1 = P.takeWhile1P (Just "Line") (/='\n')
104 p_CellLower :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
105 p_CellLower row = debugParser "CellLower" $ do
107 Cell ssp@(Span fp bp ep:|sp) (name,attrs) <-
110 (,) <$> p_Name <*> p_ElemAttrs
112 Tree (Cell ssp $ NodeLower name attrs) .
113 Seq.singleton . tree0 . (NodeText <$>)
114 let treeElem hasContent nod (Cell (Span _fp _bp ep':|_sp) t) =
115 let (o,_) = bs $ PairElem name attrs in
116 tree0 $ Cell (Span fp bp ep':|sp) $ nod $ o<>t
118 bs | hasContent = pairBordersDouble
119 | otherwise = pairBordersSingle
121 P.try (P.char '>' >> treeElem True NodeText <$> p_CellLinesUntilElemEnd indent name) <|>
122 P.try (P.char '\n' >> P.tokens (==) indent >> treeHere <$> p_CellLines indent) <|>
123 P.try (P.tokens (==) "/>" >> treeElem False NodeText <$> p_CellLine) <|>
124 (P.eof $> treeHere (Cell (Span fp ep ep:|sp) ""))
127 p_CellLine :: P.Tokens s ~ TL.Text => Parser e s (Cell TL.Text)
128 p_CellLine = p_Cell p_Line
129 p_CellLines :: P.Tokens s ~ TL.Text => P.Tokens TL.Text -> Parser e s (Cell TL.Text)
131 -- TODO: optimize special case indent == "" ?
134 <$> P.sepBy (P.try p_Line)
135 (P.try $ P.char '\n' >> P.tokens (==) indent)
136 p_CellLinesUntilElemEnd :: P.Tokens s ~ TL.Text => P.Tokens TL.Text -> Name -> Parser e s (Cell TL.Text)
137 p_CellLinesUntilElemEnd indent name =
138 p_Cell $ TL.intercalate "\n" . List.reverse <$> go []
139 -- TODO: optimize merging, and maybe case indent == ""
141 go :: P.Tokens s ~ TL.Text => [TL.Text] -> Parser e s [TL.Text]
143 let end = "</" <> name in
144 P.try ((\w l -> w <> end <> l : ls)
148 (p_Line >>= \l -> P.try $
150 >> P.tokens (==) indent
153 p_CellText1 :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
154 p_CellText1 row = debugParser "CellText" $ do
155 P.skipMany $ P.char ' '
156 n <- p_Cell $ NodeText <$> p_Line1
157 return $ tree0 n : row
159 p_CellRaw :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
160 p_CellRaw row = debugParser "CellRaw" $ do
161 P.skipMany $ P.char ' '
162 n <- p_Cell $ NodeText <$> p_Line
163 return $ tree0 n : row
165 p_CellSpaces1 :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
166 p_CellSpaces1 row = debugParser "CellSpaces" $ do
167 P.skipSome $ P.char ' '
168 cell <- p_Cell $ NodeText <$> P.string ""
169 return $ tree0 cell : row
171 p_CellEnd :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
172 p_CellEnd row = debugParser "CellEnd" $
173 P.try (p_CellLower row) <|>
174 P.try (p_CellText1 row) <|>
175 p_CellSpaces1 row <|>
178 p_Row :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
179 p_Row row = debugParser "Row" $
180 P.try (p_CellHeader row) <|>
183 p_Rows :: P.Tokens s ~ TL.Text => Rows -> Parser e s Rows
186 let rows' = rows `mergeRow` row in
188 (P.newline >> {- USELESS: P.eof $> rows' <|>-} p_Rows rows')
190 p_Trees :: P.Tokens s ~ TL.Text => Parser e s (Trees (Cell Node))
191 p_Trees = collapseRows <$> p_Rows initRows