]> Git — Sourcephile - doclang.git/blob - Language/TCT/Tree.hs
Fix writing TCT to XML.
[doclang.git] / Language / TCT / Tree.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Language.TCT.Tree
5 ( module Language.TCT.Tree
6 , Tree(..), Trees
7 ) where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
16 import Data.Int (Int)
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ordering(..), Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence ((|>))
21 import Data.TreeSeq.Strict (Tree(..), Trees)
22 import Prelude (undefined, Num(..))
23 import System.FilePath (FilePath)
24 import Text.Show (Show(..))
25 import qualified Data.List as List
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text.Lazy as TL
28
29 import Language.TCT.Utils
30 import Language.TCT.Cell
31 import Language.TCT.Elem
32 import Language.TCT.Debug
33
34 -- * Type 'Root'
35 -- | A single 'Tree' to rule all the 'Node's
36 -- simplifies greatly the navigation and transformations,
37 -- especially because the later XML or DTC output
38 -- are themselves a single tree-like data structure.
39 --
40 -- Also, having a single 'Tree' is easier to merge
41 -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' "li" ""))@),
42 -- and XML coming from the second parsing phase (eg. @NodePair (PairElem "li" [])@).
43 --
44 -- For error reporting, each 'Node' is annotated with a 'Cell'
45 -- spanning over all its content (sub-'Trees' included).
46 type Root = Tree (Cell Node)
47 type Roots = Trees (Cell Node)
48
49 pattern Tree0 :: a -> Tree a
50 pattern Tree0 a <- Tree a (null -> True)
51 where Tree0 a = Tree a mempty
52
53 -- * Type 'Node'
54 data Node
55 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
56 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
57 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
58 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
59 | NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
60 | NodePara -- ^ node, gather trees by paragraph,
61 -- useful to know when to generate a <para> XML node
62 | NodeGroup -- ^ node, group trees into a single tree,
63 -- useful to return many trees when only one is expected
64 deriving (Eq,Show)
65 instance Pretty Node
66
67 -- * Type 'Header'
68 data Header
69 = HeaderColon !Name !White -- ^ @name: @
70 | HeaderEqual !Name !White -- ^ @name=@
71 | HeaderBar !Name !White -- ^ @name|@
72 | HeaderGreat !Name !White -- ^ @name>@
73 | HeaderBrackets !Name -- ^ @[name]@
74 | HeaderDot !Name -- ^ @1. @
75 | HeaderDash -- ^ @- @
76 | HeaderDashDash -- ^ @-- @
77 | HeaderSection !LevelSection -- ^ @# @
78 | HeaderDotSlash !FilePath -- ^ @./file @
79 deriving (Eq, Ord, Show)
80 instance Pretty Header
81
82 -- ** Type 'Name'
83 type Name = TL.Text
84
85 -- ** Type 'LevelSection'
86 type LevelSection = Int
87
88 -- * Type 'Pair'
89 data Pair
90 = PairElem !ElemName !ElemAttrs -- ^ @<elem n0=v0 n1=v1>value</elem>@
91 | PairHash -- ^ @#value#@
92 | PairStar -- ^ @*value*@
93 | PairSlash -- ^ @/value/@
94 | PairUnderscore -- ^ @_value_@
95 | PairDash -- ^ @-value-@
96 | PairBackquote -- ^ @`value`@
97 | PairSinglequote -- ^ @'value'@
98 | PairDoublequote -- ^ @"value"@
99 | PairFrenchquote -- ^ @«value»@
100 | PairParen -- ^ @(value)@
101 | PairBrace -- ^ @{value}@
102 | PairBracket -- ^ @[value]@
103 deriving (Eq,Ord,Show)
104 instance Pretty Pair
105
106 -- * Type 'Token'
107 data Token
108 = TokenText !TL.Text
109 | TokenEscape !Char
110 | TokenLink !Link
111 | TokenTag !Tag
112 deriving (Eq,Show)
113
114 -- ** Type 'Tag'
115 type Tag = TL.Text
116
117 -- ** Type 'Link'
118 type Link = TL.Text
119
120 -- * Type 'Row'
121 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
122 type Row = [Root]
123
124 -- ** Type 'Rows'
125 -- | In reverse order: a list of nodes in scope
126 -- (hence to which the next line can append to).
127 type Rows = [Root]
128
129 -- | Having an initial 'Root' simplifies 'mergeRowIndent':
130 -- one can always put the last 'Root' as a child to a previous one.
131 -- This 'Root' just has to be discarded by 'collapseRows'.
132 initRows :: Rows
133 initRows = [Tree0 (Cell p p NodeGroup)]
134 where p = pos1{pos_line= -1, pos_column=0}
135 -- NOTE: such that any following 'Root'
136 -- is 'NodePara' if possible, and always a child.
137
138 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
139 --
140 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
141 -- they MAY span over multilines, and they can be many from a single line.
142 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
143 -- they MUST span only over a single and entire line.
144 --
145 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
146 mergeRow :: Rows -> Row -> Rows
147 mergeRow rows row =
148 debug2_ "mergeRow" ("news",row) ("olds",rows) $
149 mergeRowPrefix 0 rows $ List.reverse row
150
151 -- | Merge by considering matching prefixes.
152 --
153 -- 'HeaderGreat' and 'HeaderBar' work, not on indentation,
154 -- but on their vertical alignment as prefixes.
155 -- Hence, each new 'Row' has those prefixes zipped into a single one
156 -- when they match, are aligned and adjacent.
157 mergeRowPrefix :: ColNum -> Rows -> Row -> Rows
158 mergeRowPrefix col rows row =
159 debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $
160 case (row,rows) of
161 ([], _) -> rows
162 (_, []) -> undefined -- NOTE: cannot happen with initRows
163 ( _new@(Tree (Cell bn _en n) _ns):news
164 , _old@(Tree (Cell _bo eo _o) _os):_olds ) ->
165 case collapseRowsWhile isCollapsable rows of
166 [] -> mergeRowIndent rows row
167 head@(unTree -> Cell bh _eh h) : olds' ->
168 case (n,h) of
169 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
170 -- then check if there is a matching HeaderGreat,
171 -- if so, discard new and restart with a col advanced to new's beginning
172 (NodeHeader hn@HeaderGreat{}, NodeHeader hh@HeaderGreat{})
173 | pos_column bn == pos_column bh
174 , isAdjacent
175 , hn == hh -> discard
176 -- NOTE: same for HeaderBar
177 (NodeHeader hn@HeaderBar{}, NodeHeader hh@HeaderBar{})
178 | pos_column bn == pos_column bh
179 , isAdjacent
180 , hn == hh -> discard
181 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
182 -- makes it collapse entirely
183 (_, NodeHeader HeaderGreat{})
184 | col < pos_column bh -> collapse
185 -- NOTE: same for HeaderBar
186 (_, NodeHeader HeaderBar{})
187 | col < pos_column bh -> collapse
188 _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row
189 where
190 isAdjacent = pos_line bn - pos_line eo <= 1
191 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (pos_column bh) rows news
192 collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row
193 where
194 isCollapsable = -- debug2 "mergeRowPrefix/isCollapsable" "new" "old" $
195 \_new@(unTree -> Cell bn _en _n) _old@(unTree -> Cell bo eo _o) ->
196 (pos_line bn - pos_line eo <= 1) && -- adjacent
197 col < pos_column bo -- righter than col
198
199 -- | Merge by considering indentation.
200 mergeRowIndent :: Rows -> Row -> Rows
201 mergeRowIndent rows row =
202 debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $
203 case (row,rows) of
204 ([], _) -> rows
205 (_, []) -> undefined -- NOTE: cannot happen with initRows
206 ( new@(Tree (Cell bn en n) ns):news
207 ,old@(Tree (Cell bo eo o) os):olds ) ->
208 case debug0 "mergeRowIndent/colNew" (pos_column bn) `compare`
209 debug0 "mergeRowIndent/colOld" (pos_column bo) of
210 -- NOTE: new is on the left
211 LT ->
212 case (n,o) of
213 -- NOTE: merge adjacent NodeText
214 -- first
215 -- second
216 (NodeText tn, NodeText to)
217 | TL.null tn || TL.null to
218 , not isVerbatim -> collapse
219 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
220 where
221 t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn
222 boNew = bo{pos_column=pos_column bn}
223 indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
224 -- | Whether the horizontal delta is made of spaces
225 isIndented =
226 debug0 "mergeRowIndent/isIndented" $
227 case olds of
228 [] -> True
229 (unTree -> cell_end -> ep) : _ ->
230 case pos_line ep `compare` pos_line bo of
231 LT -> True
232 EQ -> pos_column ep <= pos_column bn
233 _ -> False
234 _ -> collapse
235 -- NOTE: new is vertically aligned
236 EQ ->
237 case (n,o) of
238 -- NOTE: preserve all NodeText "", but still split into two NodePara
239 (NodeText tn, NodeText to)
240 | TL.null tn || TL.null to
241 , not isVerbatim -> collapse
242 | isAdjacent -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
243 -- NOTE: HeaderSection can parent Nodes at the same level
244 (NodeHeader (HeaderSection lvlNew), _)
245 | rows'@(sec:olds') <- collapseRowsWhile isCollapsable rows
246 , (unTree -> unCell -> NodeHeader (HeaderSection lvlOld)) <- sec ->
247 if debug0 "mergeRowIndent/lvlNew" lvlNew
248 > debug0 "mergeRowIndent/lvlOld" lvlOld
249 then -- # sec
250 -- ## new
251 {-concat using sec-} List.reverse row <> rows'
252 else -- ## sec or # sec
253 -- # new # new
254 {-collapse using sec-} mergeRowIndent (collapseRoot sec olds') row
255 where
256 isCollapsable = -- debug2 "mergeRowIndent/isCollapsable" "new" "old" $
257 \_new _old@(unTree -> Cell bt _et t) ->
258 case t of
259 NodeHeader HeaderSection{} -> False
260 _ -> pos_column bt == pos_column bn
261 -- NOTE: in case of alignment, HeaderSection is parent
262 (_, NodeHeader HeaderSection{}) -> concat
263 --
264 _ -> replace
265 -- NOTE: new is on the right
266 GT ->
267 case (n,o) of
268 -- NOTE: only same line Root can be pushed on HeaderBar
269 -- DELME: (_, NodeHeader HeaderBar{}) | pos_column bn /= pos_column eo -> collapse
270 -- NOTE: keep NodeText "" out of old NodePara
271 (NodeText "", NodePara) -> collapse
272 -- NOTE: merge adjacent NodeText
273 (NodeText tn, NodeText to) ->
274 case isAdjacent of
275 _ | TL.null tn || TL.null to
276 , not isVerbatim -> collapse
277 -- old
278 -- new
279 True -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
280 -- old
281 --
282 -- new
283 False -> mergeRowIndent (collapseRoot old olds) (shifted:news)
284 where
285 shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns)
286 bnNew = bn{pos_column=pos_column bo}
287 indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
288 --
289 _ -> concat
290 where
291 isAdjacent = pos_line bn - pos_line eo <= 1
292 -- | Whether a parent semantic want new to stay a NodeText
293 isVerbatim = any p rows
294 where
295 p (unTree -> unCell -> NodeHeader HeaderBar{}) = True
296 p _ = False
297 concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows
298 merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news
299 collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row
300 replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news
301
302 -- | Like 'mergeRowIndent', but without maintaining the appending,
303 -- hence collapsing all the 'Root's of the given 'Rows'.
304 --
305 -- NOTE: 'initRows' MUST have been the first 'Rows'
306 -- before calling 'mergeRowIndent' on it to get the given 'Rows'.
307 collapseRows :: Rows -> Roots
308 collapseRows rows =
309 case collapseRowsWhile (\_new _old -> True) rows of
310 [t] -> subTrees t
311 _ -> undefined
312 -- NOTE: subTrees returns the children of the updated initRows
313
314 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
315 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
316 [] -> mempty
317 rows@(new@(Tree (Cell bn _en n) _ns):news) ->
318 case news of
319 [] -> rows
320 old@(Tree (Cell bo eo o) _os):olds
321 | not $ test new old -> rows
322 | otherwise ->
323 case debug0 "colNew" (pos_column bn) `compare`
324 debug0 "colOld" (pos_column bo) of
325 -- NOTE: new is vertically aligned
326 EQ ->
327 case (n,o) of
328 -- NOTE: HeaderSection can parent Nodes at the same level
329 (NodeHeader (HeaderSection lvlNew), _)
330 | sec:olds' <- collapseRowsWhile isCollapsable news
331 , (unTree -> unCell -> NodeHeader (HeaderSection lvlOld)) <- sec ->
332 if debug0 "collapseRowsWhile/lvlNew" lvlNew
333 > debug0 "collapseRowsWhile/lvlOld" lvlOld
334 then -- # sec
335 -- ## new
336 collapseRowsWhile test $ collapseRoot new $ sec:olds'
337 else -- ## sec or # sec
338 -- # new # new
339 collapseRowsWhile test $ new:collapseRoot sec olds'
340 where
341 isCollapsable =
342 \_new _old@(unTree -> Cell bt _et t) ->
343 case t of
344 NodeHeader HeaderSection{} -> False
345 _ -> pos_column bt == pos_column bn
346 -- NOTE: in case of alignment, HeaderSection is parent
347 (_, NodeHeader HeaderSection{}) -> collapse
348 -- NOTE: merge within old NodePara.
349 (_, NodePara{}) | isAdjacent -> collapse
350 --
351 _ -> collapse2
352 -- NOTE: new is either on the left or on the right
353 _ -> collapse
354 where
355 isAdjacent = pos_line bn - pos_line eo <= 1
356 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new news
357 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old olds
358
359 -- | Put a 'Root' as a child of the head 'Root'.
360 --
361 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
362 -- NOTE: any NodeText/NodeText merging must have been done before.
363 collapseRoot :: Root -> Rows -> Rows
364 collapseRoot new@(Tree (Cell bn en n) _ns) rows =
365 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
366 case rows of
367 [] -> return new
368 old@(Tree (Cell bo eo o) os) : olds ->
369 case (n,o) of
370 -- NOTE: never put a child into NodeText
371 (_, NodeText{}) -> collapse2
372 -- NOTE: NodeText can begin a NodePara
373 (NodeText tn, _) | not $ TL.null tn ->
374 case o of
375 -- NOTE: no NodePara within those
376 NodeHeader HeaderEqual{} -> collapse
377 NodeHeader HeaderBar{} -> collapse
378 NodeHeader HeaderDashDash{} -> collapse
379 -- NOTE: NodePara within those
380 NodePara | not isAdjacent -> para
381 NodeHeader{} -> para
382 NodeGroup -> para
383 _ -> collapse
384 -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara.
385 -- Note that since a NodePara is never adjacent to another,
386 -- it is not nested within into another.
387 -- Note that an adjacent HeaderSection can enter a NodePara.
388 (_, NodePara) | isAdjacent -> collapse
389 | otherwise -> collapse2
390 _ -> collapse
391 where
392 isAdjacent = pos_line bn - pos_line eo <= 1
393 para = Tree (Cell bn en NodePara) (return new) : rows
394 collapse = Tree (Cell bo en o) (os |> new) : olds
395 collapse2 = collapseRoot new $ collapseRoot old olds