]> Git — Sourcephile - doclang.git/blob - Language/TCT/Tree.hs
Fix parsing HeaderSection.
[doclang.git] / Language / TCT / Tree.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Language.TCT.Tree
5 ( module Language.TCT.Tree
6 , Tree(..), Trees
7 ) where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
16 import Data.Int (Int)
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ordering(..), Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence ((|>))
21 import Data.TreeSeq.Strict (Tree(..), Trees)
22 import Prelude (undefined, Num(..))
23 import System.FilePath (FilePath)
24 import Text.Show (Show(..))
25 import qualified Data.List as List
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text.Lazy as TL
28
29 import Language.TCT.Utils
30 import Language.TCT.Cell
31 import Language.TCT.Elem
32 import Language.TCT.Debug
33
34 -- * Type 'Root'
35 -- | A single 'Tree' to rule all the 'Node's
36 -- simplifies greatly the navigation and transformations,
37 -- especially because the later XML or DTC output
38 -- are themselves a single tree-like data structure.
39 --
40 -- Also, having a single 'Tree' is easier to merge
41 -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' "li" ""))@),
42 -- and XML coming from the second parsing phase (eg. @NodePair (PairElem "li" [])@).
43 --
44 -- For error reporting, each 'Node' is annotated with a 'Cell'
45 -- spanning over all its content (sub-'Trees' included).
46 type Root = Tree (Cell Node)
47 type Roots = Trees (Cell Node)
48
49 pattern Tree0 :: a -> Tree a
50 pattern Tree0 a <- Tree a (null -> True)
51 where Tree0 a = Tree a mempty
52
53 -- * Type 'Node'
54 data Node
55 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
56 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
57 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
58 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
59 | NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
60 | NodePara -- ^ node, gather trees by paragraph,
61 -- useful to know when to generate a <para> XML node
62 | NodeGroup -- ^ node, group trees into a single tree,
63 -- useful to return many trees when only one is expected
64 deriving (Eq,Show)
65 instance Pretty Node
66
67 -- * Type 'Header'
68 data Header
69 = HeaderColon !Name !White -- ^ @name: @
70 | HeaderEqual !Name !White -- ^ @name=@
71 | HeaderBar !Name !White -- ^ @name|@
72 | HeaderGreat !Name !White -- ^ @name>@
73 | HeaderBrackets !Name -- ^ @[name]@
74 | HeaderDot !Name -- ^ @1. @
75 | HeaderDash -- ^ @- @
76 | HeaderDashDash -- ^ @-- @
77 | HeaderSection !LevelSection -- ^ @# @
78 | HeaderDotSlash !FilePath -- ^ @./file @
79 deriving (Eq, Ord, Show)
80 instance Pretty Header
81
82 -- ** Type 'Name'
83 type Name = TL.Text
84
85 -- ** Type 'LevelSection'
86 type LevelSection = Int
87
88 -- * Type 'Pair'
89 data Pair
90 = PairElem !ElemName !ElemAttrs -- ^ @<elem n0=v0 n1=v1>value</elem>@
91 | PairHash -- ^ @#value#@
92 | PairStar -- ^ @*value*@
93 | PairSlash -- ^ @/value/@
94 | PairUnderscore -- ^ @_value_@
95 | PairDash -- ^ @-value-@
96 | PairBackquote -- ^ @`value`@
97 | PairSinglequote -- ^ @'value'@
98 | PairDoublequote -- ^ @"value"@
99 | PairFrenchquote -- ^ @«value»@
100 | PairParen -- ^ @(value)@
101 | PairBrace -- ^ @{value}@
102 | PairBracket -- ^ @[value]@
103 deriving (Eq,Ord,Show)
104 instance Pretty Pair
105
106 -- * Type 'Token'
107 data Token
108 = TokenText !TL.Text
109 | TokenEscape !Char
110 | TokenLink !Link
111 | TokenTag !Tag
112 deriving (Eq,Show)
113
114 -- ** Type 'Tag'
115 type Tag = TL.Text
116
117 -- ** Type 'Link'
118 type Link = TL.Text
119
120 -- * Type 'Row'
121 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
122 type Row = [Root]
123
124 -- ** Type 'Rows'
125 -- | In reverse order: a list of nodes in scope
126 -- (hence to which the next line can append to).
127 type Rows = [Root]
128
129 -- | Having an initial 'Root' simplifies 'mergeRowIndent':
130 -- one can always put the last 'Root' as a child to a previous one.
131 -- This 'Root' just has to be discarded by 'collapseRows'.
132 initRows :: Rows
133 initRows = [Tree0 (Cell p p NodeGroup)]
134 where p = pos1{pos_line= -1, pos_column=0}
135 -- NOTE: such that any following 'Root'
136 -- is 'NodePara' if possible, and always a child.
137
138 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
139 --
140 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
141 -- they MAY span over multilines, and they can be many from a single line.
142 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
143 -- they MUST span only over a single and entire line.
144 --
145 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
146 mergeRow :: Rows -> Row -> Rows
147 mergeRow rows row =
148 debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $
149 mergeRowPrefix 0 rows $ List.reverse row
150
151 -- | Merge by considering matching prefixes.
152 --
153 -- 'HeaderGreat' and 'HeaderBar' work, not on indentation,
154 -- but on their vertical alignment as prefixes.
155 -- Hence, each new 'Row' has those prefixes zipped into a single one
156 -- when they match, are aligned and adjacent.
157 mergeRowPrefix :: ColNum -> Rows -> Row -> Rows
158 mergeRowPrefix col rows row =
159 debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $
160 case (row,rows) of
161 ([], _) -> rows
162 (_, []) -> undefined -- NOTE: cannot happen with initRows
163 ( _new@(Tree (Cell bn _en n) _ns):news
164 , _old@(Tree (Cell _bo eo _o) _os):_olds ) ->
165 case collapseRowsWhile isCollapsable rows of
166 [] -> mergeRowIndent rows row
167 head@(unTree -> ch@(Cell bh _eh h)) : olds' ->
168 case (n,h) of
169 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
170 -- then check if there is a matching HeaderGreat,
171 -- if so, discard new and restart with a col advanced to new's beginning
172 (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{})
173 | isAdjacent && isMatching ch -> discard
174 {-
175 | pos_column bn == pos_column bh
176 , isAdjacent
177 , hn == hh
178 -}
179 -- NOTE: same for HeaderBar
180 (NodeHeader HeaderBar{}, NodeHeader HeaderBar{})
181 | isAdjacent && isMatching ch -> discard
182 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
183 -- makes it collapse entirely
184 (_, NodeHeader HeaderGreat{})
185 | col < pos_column bh -> collapse
186 -- NOTE: same for HeaderBar
187 (_, NodeHeader HeaderBar{})
188 | col < pos_column bh -> collapse
189 _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row
190 where
191 isAdjacent = pos_line bn - pos_line eo <= 1
192 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (pos_column bh) rows news
193 collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row
194 where
195 isMatching (Cell bh _eh h) =
196 pos_column bn == pos_column bh &&
197 n == h
198 isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $
199 \_t0@(unTree -> c0@(Cell b0 _e0 _n0)) _t1@(unTree -> Cell b1 e1 _n1) ->
200 not (isMatching c0) &&
201 (pos_line b0 - pos_line e1 <= 1) && -- adjacent
202 col < pos_column b1 -- righter than col
203
204 -- | Merge by considering indentation.
205 mergeRowIndent :: Rows -> Row -> Rows
206 mergeRowIndent rows row =
207 debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $
208 case (row,rows) of
209 ([], _) -> rows
210 (_, []) -> undefined -- NOTE: cannot happen with initRows
211 ( new@(Tree (Cell bn en n) ns):news
212 ,old@(Tree (Cell bo eo o) os):olds ) ->
213 case debug0 "mergeRowIndent/colNew" (pos_column bn) `compare`
214 debug0 "mergeRowIndent/colOld" (pos_column bo) of
215 -- NOTE: new is on the left
216 LT ->
217 case (n,o) of
218 -- NOTE: merge adjacent NodeText
219 -- first
220 -- second
221 (NodeText tn, NodeText to)
222 | TL.null tn || TL.null to
223 , not isVerbatim -> collapse
224 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
225 where
226 t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn
227 boNew = bo{pos_column=pos_column bn}
228 indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
229 -- | Whether the horizontal delta is made of spaces
230 isIndented =
231 debug0 "mergeRowIndent/isIndented" $
232 case olds of
233 [] -> True
234 (unTree -> cell_end -> ep) : _ ->
235 case pos_line ep `compare` pos_line bo of
236 LT -> True
237 EQ -> pos_column ep <= pos_column bn
238 _ -> False
239 _ -> collapse
240 -- NOTE: new is vertically aligned
241 EQ ->
242 case (n,o) of
243 -- NOTE: preserve all NodeText "", but still split into two NodePara
244 (NodeText tn, NodeText to)
245 | TL.null tn || TL.null to
246 , not isVerbatim -> collapse
247 | isAdjacent -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
248 -- NOTE: HeaderSection can parent Nodes at the same level
249 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
250 if debug0 "mergeRowIndent/lvlNew" lvlNew
251 > debug0 "mergeRowIndent/lvlOld" lvlOld
252 -- # old
253 -- ## new
254 then concat
255 -- ## old or # old
256 -- # new # new
257 else collapse
258 -- NOTE: old is no HeaderSection, then collapse to any older and loop
259 (NodeHeader HeaderSection{}, _)
260 -- | rows'@(sec:olds') <- collapseSection (pos_column bn) rows
261 | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows
262 , (unTree -> unCell -> NodeHeader HeaderSection{}) <- sec ->
263 mergeRowIndent rows' row
264 where
265 isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $
266 \_t0@(unTree -> Cell b0 _e0 n0) _t1 ->
267 case n0 of
268 NodeHeader HeaderSection{} -> False
269 _ -> pos_column bn == pos_column b0
270 -- NOTE: in case of alignment, HeaderSection is parent
271 (_, NodeHeader HeaderSection{}) -> concat
272 --
273 _ -> replace
274 -- NOTE: new is on the right
275 GT ->
276 case (n,o) of
277 -- NOTE: only same line Root can be pushed on HeaderBar
278 -- DELME: (_, NodeHeader HeaderBar{}) | pos_column bn /= pos_column eo -> collapse
279 -- NOTE: keep NodeText "" out of old NodePara
280 (NodeText "", NodePara) -> collapse
281 -- NOTE: merge adjacent NodeText
282 (NodeText tn, NodeText to) ->
283 case isAdjacent of
284 _ | TL.null tn || TL.null to
285 , not isVerbatim -> collapse
286 -- old
287 -- new
288 True -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
289 -- old
290 --
291 -- new
292 False -> mergeRowIndent (collapseRoot old olds) (shifted:news)
293 where
294 shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns)
295 bnNew = bn{pos_column=pos_column bo}
296 indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
297 --
298 _ -> concat
299 where
300 isAdjacent = pos_line bn - pos_line eo <= 1
301 -- | Whether a parent semantic want new to stay a NodeText
302 isVerbatim = any p rows
303 where
304 p (unTree -> unCell -> NodeHeader HeaderBar{}) = True
305 p _ = False
306 concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows
307 merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news
308 collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row
309 replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news
310
311 -- | Like 'mergeRowIndent', but without maintaining the appending,
312 -- hence collapsing all the 'Root's of the given 'Rows'.
313 --
314 -- NOTE: 'initRows' MUST have been the first 'Rows'
315 -- before calling 'mergeRowIndent' on it to get the given 'Rows'.
316 collapseRows :: Rows -> Roots
317 collapseRows rows =
318 debug1_ "collapseRows" ("rows",rows) $
319 case collapseRowsWhile (\_new _old -> True) rows of
320 [t] -> subTrees t
321 _ -> undefined
322 -- NOTE: subTrees returns the children of the updated initRows
323
324 -- | Collapse downto any last HeaderSection, returning it and its level.
325 collapseSection :: ColNum -> Rows -> Rows
326 collapseSection col = debug1 "collapseSection" "rows" go
327 where
328 go rows@(new@(unTree -> Cell bn _en n):olds)
329 | col <= pos_column bn =
330 case n of
331 NodeHeader HeaderSection{} -> rows
332 _ -> collapseSection col $ collapseRoot new $ go olds
333 go _ = mempty
334
335 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
336 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
337 [] -> mempty
338 rows@(new@(Tree (Cell bn _en n) _ns):news) ->
339 case news of
340 [] -> rows
341 old@(Tree (Cell bo eo o) _os):olds
342 | not $ test new old -> rows
343 | otherwise ->
344 case debug0 "collapseRowsWhile/colNew" (pos_column bn) `compare`
345 debug0 "collapseRowsWhile/colOld" (pos_column bo) of
346 -- NOTE: new is vertically aligned
347 EQ ->
348 case (n,o) of
349 -- NOTE: HeaderSection can parent Nodes at the same level
350 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
351 if debug0 "collapseRowsWhile/lvlNew" lvlNew
352 > debug0 "collapseRowsWhile/lvlOld" lvlOld
353 -- # old
354 -- ## new
355 then collapse
356 -- ## old or # old
357 -- # new # new
358 else
359 debug "collapseRowsWhile/replace" $
360 collapseRowsWhile test $ (new:) $ collapseRoot old olds
361 -- NOTE: old is no HeaderSection, then collapse to any older and loop
362 (NodeHeader HeaderSection{}, _)
363 | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news
364 , (unTree -> unCell -> NodeHeader HeaderSection{}) <- sec ->
365 collapseRowsWhile test news'
366 where
367 isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $
368 \_t0@(unTree -> Cell b0 _e0 n0) _t1 ->
369 case n0 of
370 NodeHeader HeaderSection{} -> False
371 _ -> pos_column bn == pos_column b0
372 -- NOTE: in case of alignment, HeaderSection is parent
373 (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse
374 -- NOTE: merge within old NodePara.
375 (_, NodePara) | isAdjacent -> collapse
376 --
377 _ -> collapse2
378 -- NOTE: new is either on the left or on the right
379 _ -> collapse
380 where
381 isAdjacent = pos_line bn - pos_line eo <= 1
382 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news
383 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds
384
385 -- | Put a 'Root' as a child of the head 'Root'.
386 --
387 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
388 -- NOTE: any NodeText/NodeText merging must have been done before.
389 collapseRoot :: Root -> Rows -> Rows
390 collapseRoot new@(Tree (Cell bn en n) _ns) rows =
391 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
392 case rows of
393 [] -> return new
394 old@(Tree (Cell bo eo o) os) : olds ->
395 case (n,o) of
396 -- NOTE: no child into NodeText
397 (_, NodeText{}) -> collapse2
398 -- NOTE: NodeText can begin a NodePara
399 (NodeText tn, _) | not $ TL.null tn ->
400 case o of
401 -- NOTE: no NodePara within those
402 NodeHeader HeaderEqual{} -> collapse
403 NodeHeader HeaderBar{} -> collapse
404 NodeHeader HeaderDashDash{} -> collapse
405 -- NOTE: NodePara within those
406 NodePara | not isAdjacent -> para
407 NodeHeader{} -> para
408 NodeGroup -> para
409 _ -> collapse
410 -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara.
411 -- Note that since a NodePara is never adjacent to another,
412 -- it is not nested within another.
413 (_, NodePara)
414 | isAdjacent ->
415 case n of
416 -- NOTE: no HeaderSection (even adjacent) within a NodePara
417 NodeHeader HeaderSection{} -> collapse2
418 _ -> collapse
419 | otherwise -> collapse2
420 _ -> collapse
421 where
422 isAdjacent = pos_line bn - pos_line eo <= 1
423 para = Tree (Cell bn en NodePara) (return new) : rows
424 collapse = Tree (Cell bo en o) (os |> new) : olds
425 collapse2 = collapseRoot new $ collapseRoot old olds