1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Language.TCT.Tree
5 ( module Language.TCT.Tree
9 import Control.Monad (Monad(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ordering(..), Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence ((|>))
21 import Data.TreeSeq.Strict (Tree(..), Trees)
22 import Prelude (undefined, Num(..))
23 import System.FilePath (FilePath)
24 import Text.Show (Show(..))
25 import qualified Data.List as List
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text.Lazy as TL
29 import Language.TCT.Utils
30 import Language.TCT.Cell
31 import Language.TCT.Elem
32 import Language.TCT.Debug
35 -- | A single 'Tree' to rule all the 'Node's
36 -- simplifies greatly the navigation and transformations,
37 -- especially because the later XML or DTC output
38 -- are themselves a single tree-like data structure.
40 -- Also, having a single 'Tree' is easier to merge
41 -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' "li" ""))@),
42 -- and XML coming from the second parsing phase (eg. @NodePair (PairElem "li" [])@).
44 -- For error reporting, each 'Node' is annotated with a 'Cell'
45 -- spanning over all its content (sub-'Trees' included).
46 type Root = Tree (Cell Node)
47 type Roots = Trees (Cell Node)
49 pattern Tree0 :: a -> Tree a
50 pattern Tree0 a <- Tree a (null -> True)
51 where Tree0 a = Tree a mempty
55 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
56 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
57 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
58 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
59 | NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
60 | NodePara -- ^ node, gather trees by paragraph,
61 -- useful to know when to generate a <para> XML node
62 | NodeGroup -- ^ node, group trees into a single tree,
63 -- useful to return many trees when only one is expected
69 = HeaderColon !Name !White -- ^ @name: @
70 | HeaderEqual !Name !White -- ^ @name=@
71 | HeaderBar !Name !White -- ^ @name|@
72 | HeaderGreat !Name !White -- ^ @name>@
73 | HeaderBrackets !Name -- ^ @[name]@
74 | HeaderDot !Name -- ^ @1. @
75 | HeaderDash -- ^ @- @
76 | HeaderDashDash -- ^ @-- @
77 | HeaderSection !LevelSection -- ^ @# @
78 | HeaderDotSlash !FilePath -- ^ @./file @
79 deriving (Eq, Ord, Show)
80 instance Pretty Header
85 -- ** Type 'LevelSection'
86 type LevelSection = Int
90 = PairElem !ElemName !ElemAttrs -- ^ @<elem n0=v0 n1=v1>value</elem>@
91 | PairHash -- ^ @#value#@
92 | PairStar -- ^ @*value*@
93 | PairSlash -- ^ @/value/@
94 | PairUnderscore -- ^ @_value_@
95 | PairDash -- ^ @-value-@
96 | PairBackquote -- ^ @`value`@
97 | PairSinglequote -- ^ @'value'@
98 | PairDoublequote -- ^ @"value"@
99 | PairFrenchquote -- ^ @«value»@
100 | PairParen -- ^ @(value)@
101 | PairBrace -- ^ @{value}@
102 | PairBracket -- ^ @[value]@
103 deriving (Eq,Ord,Show)
121 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
125 -- | In reverse order: a list of nodes in scope
126 -- (hence to which the next line can append to).
129 -- | Having an initial 'Root' simplifies 'mergeRowIndent':
130 -- one can always put the last 'Root' as a child to a previous one.
131 -- This 'Root' just has to be discarded by 'collapseRows'.
133 initRows = [Tree0 (Cell p p NodeGroup)]
134 where p = pos1{pos_line= -1, pos_column=0}
135 -- NOTE: such that any following 'Root'
136 -- is 'NodePara' if possible, and always a child.
138 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
140 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
141 -- they MAY span over multilines, and they can be many from a single line.
142 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
143 -- they MUST span only over a single and entire line.
145 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
146 mergeRow :: Rows -> Row -> Rows
148 debug2_ "mergeRow" ("news",row) ("olds",rows) $
149 mergeRowPrefix 0 rows $ List.reverse row
151 -- | Merge by considering matching prefixes.
153 -- 'HeaderGreat' and 'HeaderBar' work, not on indentation,
154 -- but on their vertical alignment as prefixes.
155 -- Hence, each new 'Row' has those prefixes zipped into a single one
156 -- when they match, are aligned and adjacent.
157 mergeRowPrefix :: ColNum -> Rows -> Row -> Rows
158 mergeRowPrefix col rows row =
159 debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $
162 (_, []) -> undefined -- NOTE: cannot happen with initRows
163 ( _new@(Tree (Cell bn _en n) _ns):news
164 , _old@(Tree (Cell _bo eo _o) _os):_olds ) ->
165 case collapseRowsWhile isCollapsable rows of
166 [] -> mergeRowIndent rows row
167 head@(unTree -> Cell bh _eh h) : olds' ->
169 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
170 -- then check if there is a matching HeaderGreat,
171 -- if so, discard new and restart with a col advanced to new's beginning
172 (NodeHeader hn@HeaderGreat{}, NodeHeader hh@HeaderGreat{})
173 | pos_column bn == pos_column bh
175 , hn == hh -> discard
176 -- NOTE: same for HeaderBar
177 (NodeHeader hn@HeaderBar{}, NodeHeader hh@HeaderBar{})
178 | pos_column bn == pos_column bh
180 , hn == hh -> discard
181 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
182 -- makes it collapse entirely
183 (_, NodeHeader HeaderGreat{})
184 | col < pos_column bh -> collapse
185 -- NOTE: same for HeaderBar
186 (_, NodeHeader HeaderBar{})
187 | col < pos_column bh -> collapse
188 _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row
190 isAdjacent = pos_line bn - pos_line eo <= 1
191 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (pos_column bh) rows news
192 collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row
194 isCollapsable = -- debug2 "mergeRowPrefix/isCollapsable" "new" "old" $
195 \_new@(unTree -> Cell bn _en _n) _old@(unTree -> Cell bo eo _o) ->
196 (pos_line bn - pos_line eo <= 1) && -- adjacent
197 col < pos_column bo -- righter than col
199 -- | Merge by considering indentation.
200 mergeRowIndent :: Rows -> Row -> Rows
201 mergeRowIndent rows row =
202 debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $
205 (_, []) -> undefined -- NOTE: cannot happen with initRows
206 ( new@(Tree (Cell bn en n) ns):news
207 ,old@(Tree (Cell bo eo o) os):olds ) ->
208 case debug0 "mergeRowIndent/colNew" (pos_column bn) `compare`
209 debug0 "mergeRowIndent/colOld" (pos_column bo) of
210 -- NOTE: new is on the left
213 -- NOTE: merge adjacent NodeText
216 (NodeText tn, NodeText to)
217 | TL.null tn || TL.null to
218 , not isVerbatim -> collapse
219 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
221 t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn
222 boNew = bo{pos_column=pos_column bn}
223 indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
224 -- | Whether the horizontal delta is made of spaces
226 debug0 "mergeRowIndent/isIndented" $
229 (unTree -> cell_end -> ep) : _ ->
230 case pos_line ep `compare` pos_line bo of
232 EQ -> pos_column ep <= pos_column bn
235 -- NOTE: new is vertically aligned
238 -- NOTE: preserve all NodeText "", but still split into two NodePara
239 (NodeText tn, NodeText to)
240 | TL.null tn || TL.null to
241 , not isVerbatim -> collapse
242 | isAdjacent -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
243 -- NOTE: HeaderSection can parent Nodes at the same level
244 (NodeHeader (HeaderSection lvlNew), _)
245 | rows'@(sec:olds') <- collapseRowsWhile isCollapsable rows
246 , (unTree -> unCell -> NodeHeader (HeaderSection lvlOld)) <- sec ->
247 if debug0 "mergeRowIndent/lvlNew" lvlNew
248 > debug0 "mergeRowIndent/lvlOld" lvlOld
251 {-concat using sec-} List.reverse row <> rows'
252 else -- ## sec or # sec
254 {-collapse using sec-} mergeRowIndent (collapseRoot sec olds') row
256 isCollapsable = -- debug2 "mergeRowIndent/isCollapsable" "new" "old" $
257 \_new _old@(unTree -> Cell bt _et t) ->
259 NodeHeader HeaderSection{} -> False
260 _ -> pos_column bt == pos_column bn
261 -- NOTE: in case of alignment, HeaderSection is parent
262 (_, NodeHeader HeaderSection{}) -> concat
265 -- NOTE: new is on the right
268 -- NOTE: only same line Root can be pushed on HeaderBar
269 -- DELME: (_, NodeHeader HeaderBar{}) | pos_column bn /= pos_column eo -> collapse
270 -- NOTE: keep NodeText "" out of old NodePara
271 (NodeText "", NodePara) -> collapse
272 -- NOTE: merge adjacent NodeText
273 (NodeText tn, NodeText to) ->
275 _ | TL.null tn || TL.null to
276 , not isVerbatim -> collapse
279 True -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
283 False -> mergeRowIndent (collapseRoot old olds) (shifted:news)
285 shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns)
286 bnNew = bn{pos_column=pos_column bo}
287 indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
291 isAdjacent = pos_line bn - pos_line eo <= 1
292 -- | Whether a parent semantic want new to stay a NodeText
293 isVerbatim = any p rows
295 p (unTree -> unCell -> NodeHeader HeaderBar{}) = True
297 concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows
298 merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news
299 collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row
300 replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news
302 -- | Like 'mergeRowIndent', but without maintaining the appending,
303 -- hence collapsing all the 'Root's of the given 'Rows'.
305 -- NOTE: 'initRows' MUST have been the first 'Rows'
306 -- before calling 'mergeRowIndent' on it to get the given 'Rows'.
307 collapseRows :: Rows -> Roots
309 case collapseRowsWhile (\_new _old -> True) rows of
312 -- NOTE: subTrees returns the children of the updated initRows
314 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
315 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
317 rows@(new@(Tree (Cell bn _en n) _ns):news) ->
320 old@(Tree (Cell bo eo o) _os):olds
321 | not $ test new old -> rows
323 case debug0 "colNew" (pos_column bn) `compare`
324 debug0 "colOld" (pos_column bo) of
325 -- NOTE: new is vertically aligned
328 -- NOTE: HeaderSection can parent Nodes at the same level
329 (NodeHeader (HeaderSection lvlNew), _)
330 | sec:olds' <- collapseRowsWhile isCollapsable news
331 , (unTree -> unCell -> NodeHeader (HeaderSection lvlOld)) <- sec ->
332 if debug0 "collapseRowsWhile/lvlNew" lvlNew
333 > debug0 "collapseRowsWhile/lvlOld" lvlOld
336 collapseRowsWhile test $ collapseRoot new $ sec:olds'
337 else -- ## sec or # sec
339 collapseRowsWhile test $ new:collapseRoot sec olds'
342 \_new _old@(unTree -> Cell bt _et t) ->
344 NodeHeader HeaderSection{} -> False
345 _ -> pos_column bt == pos_column bn
346 -- NOTE: in case of alignment, HeaderSection is parent
347 (_, NodeHeader HeaderSection{}) -> collapse
348 -- NOTE: merge within old NodePara.
349 (_, NodePara{}) | isAdjacent -> collapse
352 -- NOTE: new is either on the left or on the right
355 isAdjacent = pos_line bn - pos_line eo <= 1
356 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new news
357 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old olds
359 -- | Put a 'Root' as a child of the head 'Root'.
361 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
362 -- NOTE: any NodeText/NodeText merging must have been done before.
363 collapseRoot :: Root -> Rows -> Rows
364 collapseRoot new@(Tree (Cell bn en n) _ns) rows =
365 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
368 old@(Tree (Cell bo eo o) os) : olds ->
370 -- NOTE: never put a child into NodeText
371 (_, NodeText{}) -> collapse2
372 -- NOTE: NodeText can begin a NodePara
373 (NodeText tn, _) | not $ TL.null tn ->
375 -- NOTE: no NodePara within those
376 NodeHeader HeaderEqual{} -> collapse
377 NodeHeader HeaderBar{} -> collapse
378 NodeHeader HeaderDashDash{} -> collapse
379 -- NOTE: NodePara within those
380 NodePara | not isAdjacent -> para
384 -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara.
385 -- Note that since a NodePara is never adjacent to another,
386 -- it is not nested within into another.
387 -- Note that an adjacent HeaderSection can enter a NodePara.
388 (_, NodePara) | isAdjacent -> collapse
389 | otherwise -> collapse2
392 isAdjacent = pos_line bn - pos_line eo <= 1
393 para = Tree (Cell bn en NodePara) (return new) : rows
394 collapse = Tree (Cell bo en o) (os |> new) : olds
395 collapse2 = collapseRoot new $ collapseRoot old olds