1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Language.TCT.Tree
5 ( module Language.TCT.Tree
9 import Control.Monad (Monad(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ordering(..), Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence ((|>), ViewR(..))
21 import Data.TreeSeq.Strict (Tree(..), Trees)
22 import Prelude (undefined, Num(..))
23 import System.FilePath (FilePath)
24 import Text.Show (Show(..))
25 import qualified Data.List as List
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text.Lazy as TL
29 import Language.TCT.Utils
30 import Language.TCT.Cell
31 import Language.TCT.Elem
32 import Language.TCT.Debug
35 -- | A single 'Tree' to rule all the 'Node's
36 -- simplifies the navigation and transformations.
38 -- For error reporting, each 'Node' is annotated with a 'Cell'
39 -- spanning over all its content (sub-'Trees' included).
40 type Root = Tree (Cell Node)
41 type Roots = Trees (Cell Node)
43 pattern Tree0 :: a -> Tree a
44 pattern Tree0 a <- Tree a (null -> True)
45 where Tree0 a = Tree a mempty
49 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
50 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
51 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
52 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
53 | NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
54 | NodePara -- ^ node, gather trees by paragraph,
55 -- useful to know when to generate a <para> XML node
56 | NodeGroup -- ^ node, group trees into a single tree,
57 -- useful to return many trees when only one is expected
63 = HeaderColon !Name !White -- ^ @name: @
64 | HeaderEqual !Name !White -- ^ @name=@
65 | HeaderBar !Name !White -- ^ @name|@
66 | HeaderGreat !Name !White -- ^ @name>@
67 | HeaderBrackets !Name -- ^ @[name]@
68 | HeaderDot !Name -- ^ @1. @
69 | HeaderDash -- ^ @- @
70 | HeaderDashDash -- ^ @-- @
71 | HeaderSection !LevelSection -- ^ @# @
72 | HeaderDotSlash !FilePath -- ^ @./file @
73 deriving (Eq, Ord, Show)
74 instance Pretty Header
79 -- ** Type 'LevelSection'
80 type LevelSection = Int
84 = PairElem !ElemName !ElemAttrs -- ^ @<elem n0=v0 n1=v1>value</elem>@
85 | PairHash -- ^ @#value#@
86 | PairStar -- ^ @*value*@
87 | PairSlash -- ^ @/value/@
88 | PairUnderscore -- ^ @_value_@
89 | PairDash -- ^ @-value-@
90 | PairBackquote -- ^ @`value`@
91 | PairSinglequote -- ^ @'value'@
92 | PairDoublequote -- ^ @"value"@
93 | PairFrenchquote -- ^ @«value»@
94 | PairParen -- ^ @(value)@
95 | PairBrace -- ^ @{value}@
96 | PairBracket -- ^ @[value]@
97 deriving (Eq,Ord,Show)
115 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
119 -- | In reverse order: a list of nodes in scope
120 -- (hence to which the next line can append to).
123 -- | Having an initial 'Root' simplifies 'appendRow':
124 -- one can always put the last 'Root' as a child to a previous one.
125 -- This 'Root' just has to be discarded by 'collapseRows'.
127 initRows = [Tree0 (Cell p p NodeGroup)]
128 where p = pos1{pos_line= -1, pos_column=0}
129 -- NOTE: such that any following 'Root'
130 -- is 'NodePara' if possible, and always a child.
132 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
134 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
135 -- they MAY span over multilines, and they can be many from a single line.
136 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
137 -- they MUST span only over a single and entire line.
139 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
140 mergeRow :: Rows -> Row -> Rows
142 debug2_ "mergeRow" ("news",row) ("olds",rows) $
143 zipRow 0 rows $ List.reverse row
145 -- | 'HeaderGreat' and 'HeaderBar' work, not on indentation,
146 -- but on their vertical alignment as prefixes.
147 -- Hence, each new 'Row' has those prefixes zipped into a single one
148 -- when they match and are aligned.
149 zipRow :: ColNum -> Rows -> Row -> Rows
150 zipRow col rows row =
151 debug3_ "zipRow" ("col",col) ("news",row) ("olds",rows) $
154 (_, []) -> undefined -- NOTE: cannot happen with initRows
155 ( _new@(Tree (Cell bn _en n) _ns):news
156 , _old@(Tree (Cell _bo eo _o) _os):_olds ) ->
157 case collapseRowsWhile isCollapsable rows of
158 [] -> appendRow rows row
159 head@(unTree -> Cell bh _eh h) : olds' ->
161 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
162 -- then check if there is a matching HeaderGreat,
163 -- if so, discard new and restart with a col advanced to new's beginning
164 (NodeHeader hn@HeaderGreat{}, NodeHeader hh@HeaderGreat{})
165 | pos_column bn == pos_column bh
167 , hn == hh -> discard
168 -- NOTE: same for HeaderBar
169 (NodeHeader hn@HeaderBar{}, NodeHeader hh@HeaderBar{})
170 | pos_column bn == pos_column bh
172 , hn == hh -> discard
173 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
174 -- makes it collapse entirely
175 (_, NodeHeader HeaderGreat{})
176 | col < pos_column bh -> collapse
177 -- NOTE: same for HeaderBar
178 (_, NodeHeader HeaderBar{})
179 | col < pos_column bh -> collapse
180 _ -> debug "zipRow/append" $ appendRow rows row
182 discard = debug "zipRow/discard" $ zipRow (pos_column bh) rows news
183 collapse = debug "zipRow/collapse" $ zipRow col (collapseRoot head olds') row
184 isAdjacent = pos_line bn - pos_line eo <= 1
186 isCollapsable = -- debug2 "zipRow/isCollapsable" "new" "old" $
187 \_new@(unTree -> Cell bn _en _n) _old@(unTree -> Cell bo eo _o) ->
188 (pos_line bn - pos_line eo <= 1) && -- adjacent
189 col < pos_column bo -- righter than col
191 appendRow :: Rows -> Row -> Rows
193 debug2_ "appendRow" ("news",row) ("olds",rows) $
196 (_, []) -> undefined -- NOTE: cannot happen with initRows
197 ( new@(Tree (Cell bn en n) ns):news
198 ,old@(Tree (Cell bo eo o) os):olds ) ->
199 case debug0 "appendRow/colNew" (pos_column bn) `compare`
200 debug0 "appendRow/colOld" (pos_column bo) of
201 -- NOTE: new is on the left
204 -- NOTE: merge adjacent NodeText
207 (NodeText tn, NodeText to)
208 | TL.null tn || TL.null to
209 , not isVerbatim -> collapse
210 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
212 t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn
213 boNew = bo{pos_column=pos_column bn}
214 indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
215 -- | Whether the horizontal delta is made of spaces
217 debug0 "appendRow/isIndented" $
220 (unTree -> cell_end -> ep) : _ ->
221 case pos_line ep `compare` pos_line bo of
223 EQ -> pos_column ep <= pos_column bn
226 -- NOTE: new is vertically aligned
229 -- NOTE: preserve all NodeText "", but still split into two NodePara
230 (NodeText tn, NodeText to)
231 | TL.null tn || TL.null to
232 , not isVerbatim -> collapse
233 | isAdjacent -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
234 -- NOTE: HeaderSection can parent Nodes at the same level
235 (NodeHeader (HeaderSection lvlNew), _)
236 | rows'@(old':olds') <- collapseRowsWhile isCollapsable rows
237 , (unTree -> unCell -> NodeHeader (HeaderSection lvlOld)) <- old' ->
238 if debug0 "appendRow/lvlNew" lvlNew
239 > debug0 "appendRow/lvlOld" lvlOld
242 {-concat using old'-} List.reverse row <> rows'
243 else -- ## old' or # old'
245 {-collapse using old'-} appendRow (collapseRoot old' olds') row
247 isCollapsable = -- debug2 "appendRow/isCollapsable" "new" "old" $
248 \_new _old@(unTree -> Cell bt _et t) ->
250 NodeHeader HeaderSection{} -> False
251 _ -> pos_column bt == pos_column bn
252 -- NOTE: in case of alignment, HeaderSection is parent
253 (_, NodeHeader HeaderSection{}) -> concat
256 -- NOTE: new is on the right
259 -- NOTE: only same line Root can be pushed on HeaderBar
260 -- DELME: (_, NodeHeader HeaderBar{}) | pos_column bn /= pos_column eo -> collapse
261 -- NOTE: keep NodeText "" out of old NodePara
262 (NodeText "", NodePara) -> collapse
263 -- NOTE: merge adjacent NodeText
264 (NodeText tn, NodeText to) ->
266 _ | TL.null tn || TL.null to
267 , not isVerbatim -> collapse
270 True -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
274 False -> appendRow (collapseRoot old olds) (shifted:news)
276 shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns)
277 bnNew = bn{pos_column=pos_column bo}
278 indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
282 isAdjacent = pos_line bn - pos_line eo <= 1
283 -- | Whether a parent semantic want new to stay a NodeText
284 isVerbatim = any p rows
286 p (unTree -> unCell -> NodeHeader HeaderBar{}) = True
288 concat = debug "appendRow/concat" $ List.reverse row <> rows
289 merge m = debug "appendRow/merge" $ appendRow (m : olds) news
290 collapse = debug "appendRow/collapse" $ appendRow (collapseRoot old olds) row
291 replace = debug "appendRow/replace" $ appendRow (new : collapseRoot old olds) news
293 -- | Like 'appendRow', but without maintaining the appending,
294 -- hence collapsing all the 'Root's of the given 'Rows'.
296 -- NOTE: 'initRows' MUST have been the first 'Rows'
297 -- before calling 'appendRow' on it to get the given 'Rows'.
298 collapseRows :: Rows -> Roots
300 case collapseRowsWhile (\_new _old -> True) rows of
303 -- NOTE: subTrees returns the children of the updated initRows
305 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
306 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
308 rows@(new@(Tree (Cell bn _en n) _ns):olds) ->
312 old@(Tree (Cell bo eo o) _os):oldss
313 | not $ test new old -> rows
315 case debug0 "colNew" (pos_column bn) `compare`
316 debug0 "colOld" (pos_column bo) of
317 -- NOTE: new is vertically aligned
320 -- NOTE: HeaderSection can parent Nodes at the same level
321 (NodeHeader (HeaderSection lvlNew), _)
322 | old':olds' <- collapseRowsWhile isCollapsable olds
323 , (unTree -> unCell -> NodeHeader (HeaderSection lvlOld)) <- old' ->
324 if debug0 "collapseRowsWhile/lvlNew" lvlNew
325 > debug0 "collapseRowsWhile/lvlOld" lvlOld
328 collapseRowsWhile test $ collapseRoot new $ old':olds'
329 else -- ## old' or # old'
331 collapseRowsWhile test $ new:collapseRoot old' olds'
334 \_new _old@(unTree -> Cell bt _et t) ->
336 NodeHeader HeaderSection{} -> False
337 _ -> pos_column bt == pos_column bn
338 -- NOTE: in case of alignment, HeaderSection is parent
339 (_, NodeHeader HeaderSection{}) -> collapse
340 -- NOTE: merge within old NodePara.
341 (_, NodePara{}) | isAdjacent -> collapse
344 -- NOTE: new is either on the left or on the right
347 isAdjacent = pos_line bn - pos_line eo <= 1
348 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new olds
349 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old oldss
351 -- | Put a 'Root' as a child of the head 'Root'.
353 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
354 -- NOTE: any NodeText/NodeText merging must have been done before.
355 collapseRoot :: Root -> Rows -> Rows
356 collapseRoot new@(Tree (Cell bn en n) _ns) rows =
357 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
360 old@(Tree (Cell bo eo o) os) : olds ->
362 -- NOTE: never put a NodePara directly within another
363 (NodePara, NodePara) -> collapse2
364 -- NOTE: never put a collapse to NodeText, except some NodeHeader to preserve them
365 (_, NodeText{}) -> collapse2
366 -- NOTE: NodeText can begin a NodePara
367 (NodeText tn, _) | not $ TL.null tn ->
369 -- NOTE: no NodePara within those
370 NodeHeader HeaderEqual{} -> collapse
371 NodeHeader HeaderBar{} -> collapse
372 NodeHeader HeaderDashDash{} -> collapse
373 -- NOTE: NodePara within those
374 NodePara | not isAdjacent -> para
380 isAdjacent = pos_line bn - pos_line eo <= 1
381 para = Tree (Cell bn en NodePara) (return new) : rows
382 collapse = Tree (Cell bo en o) (os |> new) : olds
383 collapse2 = collapseRoot new $ collapseRoot old olds
385 -- | Return a 'Tree' from a 'Cell' node and 'subTrees',
386 -- while adjusting the node's 'cell_end'
387 -- with the last 'Tree' of the 'subTrees'.
388 tree :: Cell a -> Trees (Cell a) -> Tree (Cell a)
389 tree (Cell bp ep a) ts = Tree (Cell bp ep' a) ts
391 ep' = case Seq.viewr ts of
393 _ :> (unTree -> cell_end -> p) -> p