1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Language.TCT.Tree
5 ( module Language.TCT.Tree
9 import Control.Monad (Monad(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ordering(..), Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence ((|>))
21 import Data.TreeSeq.Strict (Tree(..), Trees)
22 import Prelude (undefined, Num(..))
23 import System.FilePath (FilePath)
24 import Text.Show (Show(..))
25 import qualified Data.List as List
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text.Lazy as TL
29 import Language.TCT.Utils
30 import Language.TCT.Cell
31 import Language.TCT.Elem
32 import Language.TCT.Debug
35 -- | A single 'Tree' to rule all the 'Node's
36 -- simplifies greatly the navigation and transformations,
37 -- especially because the later XML or DTC output
38 -- are themselves a single tree-like data structure.
40 -- Also, having a single 'Tree' is easier to merge
41 -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' "li" ""))@),
42 -- and XML coming from the second parsing phase (eg. @NodePair (PairElem "li" [])@).
44 -- For error reporting, each 'Node' is annotated with a 'Cell'
45 -- spanning over all its content (sub-'Trees' included).
46 type Root = Tree (Cell Node)
47 type Roots = Trees (Cell Node)
49 pattern Tree0 :: a -> Tree a
50 pattern Tree0 a <- Tree a (null -> True)
51 where Tree0 a = Tree a mempty
55 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
56 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
57 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
58 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
59 | NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
60 | NodePara -- ^ node, gather trees by paragraph,
61 -- useful to know when to generate a <para> XML node
67 = HeaderColon !Name !White -- ^ @name: @
68 | HeaderEqual !Name !White -- ^ @name=@
69 | HeaderBar !Name !White -- ^ @name|@
70 | HeaderGreat !Name !White -- ^ @name>@
71 | HeaderBrackets !Name -- ^ @[name]@
72 | HeaderDot !Name -- ^ @1. @
73 | HeaderDash -- ^ @- @
74 | HeaderDashDash -- ^ @-- @
75 | HeaderSection !LevelSection -- ^ @# @
76 | HeaderDotSlash !FilePath -- ^ @./file @
77 deriving (Eq, Ord, Show)
78 instance Pretty Header
83 -- ** Type 'LevelSection'
84 type LevelSection = Int
88 = PairElem !ElemName !ElemAttrs -- ^ @\<name a0=v0 a1=v1>text\</name>@
89 | PairHash -- ^ @\#text#@
90 | PairStar -- ^ @*text*@
91 | PairSlash -- ^ @/text/@
92 | PairUnderscore -- ^ @_value_@
93 | PairDash -- ^ @-text-@
94 | PairBackquote -- ^ @`text`@
95 | PairSinglequote -- ^ @'text'@
96 | PairDoublequote -- ^ @"text"@
97 | PairFrenchquote -- ^ @«text»@
98 | PairParen -- ^ @(text)@
99 | PairBrace -- ^ @{text}@
100 | PairBracket -- ^ @[text]@
101 deriving (Eq,Ord,Show)
119 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
123 -- | In reverse order: a list of nodes in scope
124 -- (hence to which the next line can append to).
127 -- | Having an initial 'Root' simplifies 'mergeRowIndent':
128 -- one can always put the last 'Root' as a child to a previous one.
129 -- This 'Root' just has to be discarded by 'collapseRows'.
131 initRows = [Tree0 $ Cell p p $ NodeHeader HeaderDash]
132 where p = pos1{pos_line= -1, pos_column=0}
133 -- NOTE: such that any following 'Root'
134 -- is 'NodePara' if possible, and always a child.
136 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
138 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
139 -- they MAY span over multilines, and they can be many from a single line.
140 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
141 -- they MUST span only over a single and entire line.
143 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
144 mergeRow :: Rows -> Row -> Rows
146 debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $
147 mergeRowPrefix 0 rows $ List.reverse row
149 -- | Merge by considering matching prefixes.
151 -- 'HeaderGreat' and 'HeaderBar' work, not on indentation,
152 -- but on their vertical alignment as prefixes.
153 -- Hence, each new 'Row' has those prefixes zipped into a single one
154 -- when they match, are aligned and adjacent.
155 mergeRowPrefix :: ColNum -> Rows -> Row -> Rows
156 mergeRowPrefix col rows row =
157 debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $
160 (_, []) -> undefined -- NOTE: cannot happen with initRows
161 ( _new@(Tree (Cell bn _en n) _ns):news
162 , _old@(Tree (Cell _bo eo _o) _os):_olds ) ->
163 case collapseRowsWhile isCollapsable rows of
164 [] -> mergeRowIndent rows row
165 head@(unTree -> ch@(Cell bh _eh h)) : olds' ->
167 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
168 -- then check if there is a matching HeaderGreat,
169 -- if so, discard new and restart with a col advanced to new's beginning
170 (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{})
171 | isAdjacent && isMatching ch -> discard
172 -- NOTE: same for HeaderBar
173 (NodeHeader HeaderBar{}, NodeHeader HeaderBar{})
174 | isAdjacent && isMatching ch -> discard
175 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
176 -- makes it collapse entirely
177 (_, NodeHeader HeaderGreat{})
178 | col < pos_column bh -> collapse
179 -- NOTE: same for HeaderBar
180 (_, NodeHeader HeaderBar{})
181 | col < pos_column bh -> collapse
182 _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row
184 isAdjacent = pos_line bn - pos_line eo <= 1
185 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (pos_column bh) rows news
186 collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row
188 isMatching (Cell bh _eh h) =
189 pos_column bn == pos_column bh &&
191 isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $
192 \_t0@(unTree -> c0@(Cell b0 _e0 _n0)) _t1@(unTree -> Cell b1 e1 _n1) ->
193 not (isMatching c0) &&
194 (pos_line b0 - pos_line e1 <= 1) && -- adjacent
195 col < pos_column b1 -- righter than col
197 -- | Merge by considering indentation.
198 mergeRowIndent :: Rows -> Row -> Rows
199 mergeRowIndent rows row =
200 debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $
203 (_, []) -> undefined -- NOTE: cannot happen with initRows
204 ( new@(Tree (Cell bn en n) ns):news
205 ,old@(Tree (Cell bo eo o) os):olds ) ->
206 case debug0 "mergeRowIndent/colNew" (pos_column bn) `compare`
207 debug0 "mergeRowIndent/colOld" (pos_column bo) of
208 -- NOTE: new is on the left
211 -- NOTE: merge adjacent NodeText
214 (NodeText tn, NodeText to)
215 | TL.null tn || TL.null to
216 , not isVerbatim -> collapse
217 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
219 t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn
220 boNew = bo{pos_column=pos_column bn}
221 indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
222 -- | Whether the horizontal delta is made of spaces
224 debug0 "mergeRowIndent/isIndented" $
227 (unTree -> (cell_end -> ep)) : _ ->
228 case pos_line ep `compare` pos_line bo of
230 EQ -> pos_column ep <= pos_column bn
233 -- NOTE: new is vertically aligned
236 -- NOTE: preserve all NodeText "", but still split into two NodePara
237 (NodeText tn, NodeText to)
238 | TL.null tn || TL.null to
239 , not isVerbatim -> collapse
240 | isAdjacent -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
241 -- NOTE: HeaderSection can parent Nodes at the same level
242 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
243 if debug0 "mergeRowIndent/lvlNew" lvlNew
244 > debug0 "mergeRowIndent/lvlOld" lvlOld
251 -- NOTE: old is no HeaderSection, then collapse to any older and loop
252 (NodeHeader HeaderSection{}, _)
253 | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows
254 , (unTree -> (unCell -> NodeHeader HeaderSection{})) <- sec ->
255 mergeRowIndent rows' row
257 isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $
258 \_t0@(unTree -> Cell b0 _e0 n0) _t1 ->
260 NodeHeader HeaderSection{} -> False
261 _ -> pos_column bn == pos_column b0
262 -- NOTE: in case of alignment, HeaderSection is parent
263 (_, NodeHeader HeaderSection{}) -> concat
266 -- NOTE: new is on the right
269 -- NOTE: only same line Root can be pushed on HeaderBar
270 -- DELME: (_, NodeHeader HeaderBar{}) | pos_column bn /= pos_column eo -> collapse
271 -- NOTE: keep NodeText "" out of old NodePara
272 (NodeText "", NodePara) -> collapse
273 -- NOTE: merge adjacent NodeText
274 (NodeText tn, NodeText to) ->
276 _ | TL.null tn || TL.null to
277 , not isVerbatim -> collapse
280 True -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
284 False -> mergeRowIndent (collapseRoot old olds) (shifted:news)
286 shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns)
287 bnNew = bn{pos_column=pos_column bo}
288 indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
292 isAdjacent = pos_line bn - pos_line eo <= 1
293 -- | Whether a parent semantic want new to stay a NodeText
294 isVerbatim = any p rows
296 p (unTree -> (unCell -> NodeHeader HeaderBar{})) = True
298 concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows
299 merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news
300 collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row
301 replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news
303 -- | Like 'mergeRowIndent', but without maintaining the appending,
304 -- hence collapsing all the 'Root's of the given 'Rows'.
306 -- NOTE: 'initRows' MUST have been the first 'Rows'
307 -- before calling 'mergeRowIndent' on it to get the given 'Rows'.
308 collapseRows :: Rows -> Roots
310 debug1_ "collapseRows" ("rows",rows) $
311 case collapseRowsWhile (\_new _old -> True) rows of
314 -- NOTE: subTrees returns the children of the updated initRows
316 -- | Collapse downto any last HeaderSection, returning it and its level.
317 collapseSection :: ColNum -> Rows -> Rows
318 collapseSection col = debug1 "collapseSection" "rows" go
320 go rows@(new@(unTree -> Cell bn _en n):olds)
321 | col <= pos_column bn =
323 NodeHeader HeaderSection{} -> rows
324 _ -> collapseSection col $ collapseRoot new $ go olds
327 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
328 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
330 rows@(new@(Tree (Cell bn _en n) _ns):news) ->
333 old@(Tree (Cell bo eo o) _os):olds
334 | not $ test new old -> rows
336 case debug0 "collapseRowsWhile/colNew" (pos_column bn) `compare`
337 debug0 "collapseRowsWhile/colOld" (pos_column bo) of
338 -- NOTE: new is vertically aligned
341 -- NOTE: HeaderSection can parent Nodes at the same level
342 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
343 if debug0 "collapseRowsWhile/lvlNew" lvlNew
344 > debug0 "collapseRowsWhile/lvlOld" lvlOld
351 debug "collapseRowsWhile/replace" $
352 collapseRowsWhile test $ (new:) $ collapseRoot old olds
353 -- NOTE: old is no HeaderSection, then collapse to any older and loop
354 (NodeHeader HeaderSection{}, _)
355 | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news
356 , (unTree -> (unCell -> NodeHeader HeaderSection{})) <- sec ->
357 collapseRowsWhile test news'
359 isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $
360 \_t0@(unTree -> Cell b0 _e0 n0) _t1 ->
362 NodeHeader HeaderSection{} -> False
363 _ -> pos_column bn == pos_column b0
364 -- NOTE: in case of alignment, HeaderSection is parent
365 (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse
366 -- NOTE: merge within old NodePara.
367 (_, NodePara) | isAdjacent -> collapse
370 -- NOTE: new is either on the left or on the right
373 isAdjacent = pos_line bn - pos_line eo <= 1
374 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news
375 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds
377 -- | Put a 'Root' as a child of the head 'Root'.
379 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
381 -- NOTE: any NodeText/NodeText merging must have been done before.
382 collapseRoot :: Root -> Rows -> Rows
383 collapseRoot new@(Tree (Cell bn en n) _ns) rows =
384 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
387 old@(Tree (Cell bo eo o) os) : olds ->
389 -- NOTE: no child into NodeText
390 (_, NodeText{}) -> collapse2
391 -- NOTE: NodeText can begin a NodePara
392 (NodeText tn, _) | not $ TL.null tn ->
394 -- NOTE: no NodePara within those
395 NodeHeader HeaderEqual{} -> collapse
396 NodeHeader HeaderBar{} -> collapse
397 NodeHeader HeaderDashDash{} -> collapse
398 -- NOTE: NodePara within those
399 NodePara | not isAdjacent -> para
402 -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara.
403 -- Note that since a NodePara is never adjacent to another,
404 -- it is not nested within another.
408 -- NOTE: no HeaderSection (even adjacent) within a NodePara
409 NodeHeader HeaderSection{} -> collapse2
411 | otherwise -> collapse2
414 isAdjacent = pos_line bn - pos_line eo <= 1
415 para = Tree (Cell bn en NodePara) (return new) : rows
416 collapse = Tree (Cell bo en o) (os |> new) : olds
417 collapse2 = collapseRoot new $ collapseRoot old olds