]> Git — Sourcephile - doclang.git/blob - Language/TCT/Read/Tree.hs
Use Text.Lazy to speedup Token parsing.
[doclang.git] / Language / TCT / Read / Tree.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE Rank2Types #-}
4 {-# LANGUAGE TupleSections #-}
5 {-# LANGUAGE TypeFamilies #-}
6 module Language.TCT.Read.Tree where
7
8 import Control.Applicative (Applicative(..), Alternative(..))
9 import Control.Monad (Monad(..), void)
10 import Data.Bool
11 import Data.Eq (Eq(..))
12 import Data.Function (($), (.))
13 import Data.Functor ((<$>), ($>), (<$))
14 import Data.Monoid (Monoid(..))
15 import Data.Semigroup (Semigroup(..))
16 import Data.String (String)
17 import Data.Text (Text)
18 import Prelude (undefined, Int, Num(..), toInteger)
19 import qualified Data.Char as Char
20 import qualified Data.List as List
21 import qualified Data.Sequence as Seq
22 import qualified Data.Text as Text
23 import qualified Data.Text.Lazy as TL
24 import qualified Text.Megaparsec as P
25
26 import Language.TCT.Tree
27 import Language.TCT.Token
28 import Language.TCT.Read.Elem
29
30 p_Position :: Parser e s Pos
31 p_Position = (<$> P.getPosition) $ \p ->
32 Pos
33 (intOfPos $ P.sourceLine p)
34 (intOfPos $ P.sourceColumn p)
35 intOfPos :: P.Pos -> Int
36 intOfPos = fromInteger . toInteger . P.unPos
37
38 p_Line :: Parser e s Line
39 p_Line = intOfPos . P.sourceLine <$> P.getPosition
40
41 p_Column :: Parser e s Column
42 p_Column = intOfPos . P.sourceColumn <$> P.getPosition
43
44 p_CellKey :: Row -> Parser e s Row
45 p_CellKey row = pdbg "CellKey" $ do
46 P.skipMany $ P.char ' '
47 pos <- p_Position
48 key <- pdbg "Key" $
49 P.choice $
50 [ P.try $ P.string "- " $> KeyDash
51 , P.try $ KeyDot . Text.pack
52 <$> P.some (P.satisfy Char.isDigit)
53 <* P.char '.'
54 <* P.lookAhead (P.try $ P.eof <|> void (P.satisfy $ \c -> c=='\n'||c==' '))
55 -- TODO: KeyComment
56 , P.try $ P.some (P.char '#') <* P.lookAhead (P.try $ P.char ' ') >>= \hs ->
57 return $ KeySection $ List.length hs
58 , do
59 name <- p_Name
60 wh <- Text.pack <$> P.many (P.char ' ')
61 P.choice
62 [ P.try $ KeyColon name wh
63 <$ P.char ':'
64 <* P.lookAhead (P.try $ P.eof <|> void (P.satisfy $ \c -> c=='\n'||c==' '))
65 , P.char '>' $> KeyGreat name wh
66 , P.char '=' $> KeyEqual name wh
67 , P.char '|' $> KeyBar name wh
68 -- TODO: KeyAt
69 ]
70 ]
71 posEnd <- p_Position
72 let row' = TreeN (Cell pos posEnd key) mempty : row
73 case key of
74 KeySection{} -> p_CellEnd row'
75 KeyDash{} -> p_Row row'
76 KeyDot{} -> p_Row row'
77 KeyColon{} -> p_Row row'
78 KeyGreat{} -> p_Row row'
79 KeyEqual{} -> p_CellEnd row'
80 KeyBar{} -> p_CellEnd row'
81 KeyLower{} -> undefined -- NOTE: handled in 'p_CellLower'
82
83 p_Name :: Parser e s Name
84 p_Name =
85 (\h t -> Text.pack (h:t))
86 <$> (P.satisfy $ \c ->
87 Char.isAlphaNum c || c=='_')
88 <*> many (P.satisfy $ \c ->
89 Char.isAlphaNum c || c=='-' || c=='_')
90
91 p_CellLower :: Row -> Parser e s Row
92 p_CellLower row = pdbg "CellLower" $ do
93 P.skipMany $ P.char ' '
94 pos <- p_Position
95 void $ P.char '<'
96 name <- p_Name
97 attrs <- p_attrs
98 posClose <- p_Position
99 let treeHere =
100 TreeN (Cell pos posClose $ KeyLower name attrs) .
101 Seq.singleton . Tree0
102 let treeElem toks (Cell _ p c) =
103 let (o,_) = pairBorders (PairElem name attrs) toks in
104 Tree0 $ Cell pos p (o<>c)
105 let indent = List.replicate (columnPos pos - 1) ' '
106 tree <-
107 P.try (P.char '>' >> treeElem (tokens [TokenPlain ""]) <$> p_CellLinesUntilElemEnd indent name) <|>
108 P.try (P.char '\n' >> P.string indent >> treeHere <$> p_CellLines indent) <|>
109 P.try (P.string "/>" >> treeElem mempty <$> p_CellLine) <|>
110 (P.eof $> treeHere (Cell posClose posClose ""))
111 return (tree:row)
112 where
113 p_attrs = P.many $ P.try $
114 (,)
115 <$> (Text.pack <$> P.some (P.char ' '))
116 <*> p_Attr
117 p_line :: Parser e s TL.Text
118 p_line = TL.pack <$> P.many (P.satisfy (/='\n'))
119 p_CellLine :: Parser e s (Cell TL.Text)
120 p_CellLine = do
121 pos <- p_Position
122 content <- p_line
123 posEnd <- p_Position
124 return $ Cell pos posEnd content
125 p_CellLines :: String -> Parser e s (Cell TL.Text)
126 p_CellLines indent = do
127 pos <- p_Position
128 content <-
129 TL.intercalate "\n"
130 <$> P.sepBy (P.try p_line) (P.try $ P.char '\n' >> P.string indent)
131 posEnd <- p_Position
132 return $ Cell pos posEnd content
133 p_CellLinesUntilElemEnd :: String -> Text -> Parser e s (Cell TL.Text)
134 p_CellLinesUntilElemEnd indent name = do
135 pos <- p_Position
136 content <- TL.intercalate "\n" . List.reverse <$> go []
137 posEnd <- p_Position
138 return $ Cell pos posEnd content
139 where
140 go :: [TL.Text] -> Parser e s [TL.Text]
141 go ls =
142 P.try ((\w p l -> TL.pack w <> TL.pack p <> l : ls)
143 <$> P.many (P.char ' ')
144 <*> P.string ("</"<>Text.unpack name)
145 <*> p_line) <|>
146 (p_line >>= \l -> P.try $
147 P.char '\n' >>
148 P.string indent >>
149 go (l:ls))
150
151 p_CellText :: Row -> Parser e s Row
152 p_CellText row = pdbg "CellText" $ do
153 P.skipMany $ P.char ' '
154 pos <- p_Position
155 line <- TL.pack <$> P.some (P.satisfy (/='\n'))
156 posEnd <- p_Position
157 return $ Tree0 (Cell pos posEnd line) : row
158
159 p_CellSpaces :: Row -> Parser e s Row
160 p_CellSpaces row = pdbg "CellSpaces" $ do
161 P.skipSome $ P.char ' '
162 pos <- p_Position
163 return $ Tree0 (Cell pos pos "") : row
164
165 p_CellEnd :: Row -> Parser e s Row
166 p_CellEnd row = pdbg "Row" $
167 P.try (p_CellLower row) <|>
168 P.try (p_CellText row) <|>
169 p_CellSpaces row <|>
170 return row
171
172 p_Row :: Row -> Parser e s Row
173 p_Row row = pdbg "Row" $
174 P.try (p_CellKey row) <|>
175 p_CellEnd row
176
177 p_Rows :: Rows -> Parser e s Rows
178 p_Rows rows =
179 p_Row [] >>= \row ->
180 let rows' = appendRow rows (List.reverse row) in
181 (P.eof $> rows') <|>
182 (P.newline >> p_Rows rows')
183
184 p_Trees :: Parser e s (Trees (Cell Key) (Cell TL.Text))
185 p_Trees = unRoot . collapseRows <$> p_Rows [root]
186 where
187 root = TreeN (Cell pos0 pos0 KeyDash) mempty
188 unRoot (TreeN (unCell -> KeyDash) roots) = roots
189 unRoot _ = undefined