1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE ScopedTypeVariables #-}
5 {-# LANGUAGE TypeFamilies #-}
6 module Hdoc.TCT.Read.Tree where
8 import Control.Applicative (Applicative(..), Alternative(..))
9 import Control.Monad (Monad(..), void)
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Function (($), (.))
14 import Data.Functor ((<$>), ($>), (<$))
15 import Data.List.NonEmpty (NonEmpty(..))
16 import Data.Maybe (Maybe(..), maybe)
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.TreeSeq.Strict (Tree(..), Trees, tree0)
20 import qualified Data.Char as Char
21 import qualified Data.List as List
22 import qualified Data.Sequence as Seq
23 import qualified Data.Text.Lazy as TL
24 import qualified Language.Symantic.XML as XML
25 import qualified Text.Megaparsec as P
26 import qualified Text.Megaparsec.Char as P
32 import Hdoc.TCT.Read.Cell
33 import Hdoc.TCT.Read.Elem
34 import Hdoc.TCT.Read.Token
36 p_CellHeader :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
37 p_CellHeader row = debugParser "CellHeader" $ do
38 P.skipMany $ P.char ' '
39 Sourced sp h <- p_Cell $ do
40 debugParser "Header" $
42 [ P.try $ P.char '-' >>
43 P.char ' ' $> HeaderDash <|>
44 P.string "- " $> HeaderDashDash
48 <* P.lookAhead (P.try $ P.eof <|> void (P.satisfy $ \c -> c=='\n'||c==' '))
49 , P.try $ P.some (P.char '#') <* P.lookAhead (P.try $ P.char ' ') >>= \hs ->
50 return $ HeaderSection $ List.length hs
53 <$> P.between (P.char '[') (P.string "]:")
54 (P.takeWhile1P (Just "Reference") isReferenceChar)
55 -- <* P.lookAhead (P.try $ P.eof <|> void (P.satisfy (=='\n')))
57 (\f -> HeaderDotSlash $ "./"<>f)
59 <*> P.many (P.satisfy (/='\n'))
61 name <- P.optional p_ElemName
64 [ P.try $ HeaderColon name wh
66 <* P.lookAhead (P.try $ P.eof <|> void (P.satisfy $ \c -> c=='\n'||c==' '))
67 , P.char '>' $> HeaderGreat name wh
68 , maybe empty (\n -> P.char '=' $> HeaderEqual n wh) name
69 , P.char '|' $> HeaderBar name wh
72 let row' = Tree (Sourced sp $ NodeHeader h) mempty : row
74 HeaderSection{} -> p_CellEnd row'
75 HeaderDash{} -> p_Row row'
76 HeaderDashDash{} -> p_CellRaw row'
77 HeaderDot{} -> p_Row row'
78 HeaderColon{} -> p_Row row'
79 HeaderBrackets{} -> p_Row row'
80 HeaderGreat{} -> p_Row row'
81 HeaderEqual{} -> p_CellRaw row'
82 HeaderBar{} -> p_CellRaw row'
83 HeaderDotSlash{} -> p_CellEnd row'
85 isReferenceChar :: Char -> Bool
92 p_Name :: P.Tokens s ~ TL.Text => Parser e s ElemName
93 p_Name = XML.NCName <$> p_AlphaNums
95 (\h t -> Text.pack (h:t))
96 <$> (P.satisfy $ \c -> Char.isAlphaNum c || c=='_')
97 <*> P.takeWhile1P (P.satisfy $ \c -> Char.isAlphaNum c || c=='-' || c=='_')
100 p_Line :: P.Tokens s ~ TL.Text => Parser e s TL.Text
101 p_Line = P.takeWhileP (Just "Line") (/='\n')
103 p_Line1 :: P.Tokens s ~ TL.Text => Parser e s TL.Text
104 p_Line1 = P.takeWhile1P (Just "Line") (/='\n')
106 p_CellLower :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
107 p_CellLower row = debugParser "CellLower" $ do
109 Sourced ssp@(FileRange fp bp ep:|sp) (name, attrs) <-
112 (,) <$> p_ElemName <*> p_ElemAttrs
114 Tree (Sourced ssp $ NodeLower name attrs) .
115 Seq.singleton . tree0 . (NodeText <$>)
116 let treeElem hasContent nod (Sourced (FileRange _fp _bp ep':|_sp) t) =
117 let (o,_) = bs $ PairElem name attrs in
118 tree0 $ Sourced (FileRange fp bp ep':|sp) $ nod $ o<>t
120 bs | hasContent = pairBordersDouble
121 | otherwise = pairBordersSingle
123 P.try (P.char '>' >> treeElem True NodeText <$> p_CellLinesUntilElemEnd indent name) <|>
124 P.try (P.char '\n' >> P.tokens (==) indent >> treeHere <$> p_CellLines indent) <|>
125 P.try (P.tokens (==) "/>" >> treeElem False NodeText <$> p_CellLine) <|>
126 (P.eof $> treeHere (Sourced (FileRange fp ep ep:|sp) ""))
129 p_CellLine :: P.Tokens s ~ TL.Text => Parser e s (Cell TL.Text)
130 p_CellLine = p_Cell p_Line
131 p_CellLines :: P.Tokens s ~ TL.Text => P.Tokens TL.Text -> Parser e s (Cell TL.Text)
133 -- TODO: optimize special case indent == "" ?
136 <$> P.sepBy (P.try p_Line)
137 (P.try $ P.char '\n' >> P.tokens (==) indent)
138 p_CellLinesUntilElemEnd :: P.Tokens s ~ TL.Text => P.Tokens TL.Text -> ElemName -> Parser e s (Cell TL.Text)
139 p_CellLinesUntilElemEnd indent (XML.NCName name) =
140 p_Cell $ TL.intercalate "\n" . List.reverse <$> go []
141 -- TODO: optimize merging, and maybe case indent == ""
143 go :: P.Tokens s ~ TL.Text => [TL.Text] -> Parser e s [TL.Text]
145 let end = "</" <> name in
146 P.try ((\w l -> w <> end <> l : ls)
150 (p_Line >>= \l -> P.try $
152 >> P.tokens (==) indent
155 p_CellText1 :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
156 p_CellText1 row = debugParser "CellText" $ do
157 P.skipMany $ P.char ' '
158 n <- p_Cell $ NodeText <$> p_Line1
159 return $ tree0 n : row
161 p_CellRaw :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
162 p_CellRaw row = debugParser "CellRaw" $ do
163 P.skipMany $ P.char ' '
164 n <- p_Cell $ NodeText <$> p_Line
165 return $ tree0 n : row
167 p_CellSpaces1 :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
168 p_CellSpaces1 row = debugParser "CellSpaces" $ do
169 P.skipSome $ P.char ' '
170 cell <- p_Cell $ NodeText <$> P.string ""
171 return $ tree0 cell : row
173 p_CellEnd :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
174 p_CellEnd row = debugParser "CellEnd" $
175 P.try (p_CellLower row) <|>
176 P.try (p_CellText1 row) <|>
177 p_CellSpaces1 row <|>
180 p_Row :: P.Tokens s ~ TL.Text => Row -> Parser e s Row
181 p_Row row = debugParser "Row" $
182 P.try (p_CellHeader row) <|>
185 p_Rows :: P.Tokens s ~ TL.Text => Rows -> Parser e s Rows
188 let rows' = rows `mergeRow` row in
190 (P.newline >> {- USELESS: P.eof $> rows' <|>-} p_Rows rows')
192 p_Trees :: P.Tokens s ~ TL.Text => Parser e s (Trees (Cell Node))
193 p_Trees = collapseRows <$> p_Rows initRows