]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Tree.hs
Add PairAt, TokenAt and PlainAt.
[doclang.git] / Hdoc / TCT / Tree.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Hdoc.TCT.Tree
5 ( module Hdoc.TCT.Tree
6 , Tree(..), Trees
7 ) where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
16 import Data.Int (Int)
17 import Data.List.NonEmpty (NonEmpty(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ordering(..), Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence ((|>))
22 import Data.TreeSeq.Strict (Tree(..), Trees)
23 import Prelude (undefined, Num(..))
24 import System.FilePath (FilePath)
25 import Text.Show (Show(..))
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text.Lazy as TL
29
30 import Hdoc.TCT.Utils
31 import Hdoc.TCT.Cell
32 import Hdoc.TCT.Elem
33 import Hdoc.TCT.Debug
34
35 -- * Type 'Root'
36 -- | A single 'Tree' to gather all the 'Node's
37 -- simplifies greatly the navigation and transformations,
38 -- especially because the later XML or DTC output
39 -- are themselves a single tree-like data structure.
40 --
41 -- Also, having a single 'Tree' is easier to merge
42 -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' "li" ""))@),
43 -- and XML coming from the second parsing phase (eg. @NodePair (PairElem "li" [])@).
44 --
45 -- For error reporting, indentation sensitivity and paragraph grouping,
46 -- each 'Node' is annotated with a 'Cell'
47 -- spanning over all its content (sub-'Trees' included).
48 type Root = Tree (Cell Node)
49 type Roots = Trees (Cell Node)
50
51 pattern Tree0 :: a -> Tree a
52 pattern Tree0 a <- Tree a (null -> True)
53 where Tree0 a = Tree a mempty
54
55 -- * Type 'Node'
56 data Node
57 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
58 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
59 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
60 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
61 | NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
62 | NodePara -- ^ node, gather trees by paragraph,
63 -- useful to know when to generate a <para> XML node
64 deriving (Eq,Show)
65 instance Pretty Node
66
67 -- * Type 'Header'
68 data Header
69 = HeaderColon !Name !White -- ^ @name: @
70 | HeaderEqual !Name !White -- ^ @name=@
71 | HeaderBar !Name !White -- ^ @name|@
72 | HeaderGreat !Name !White -- ^ @name>@
73 | HeaderBrackets !Name -- ^ @[name]@
74 | HeaderDot !Name -- ^ @1. @
75 | HeaderDash -- ^ @- @
76 | HeaderDashDash -- ^ @-- @
77 | HeaderSection !LevelSection -- ^ @# @
78 | HeaderDotSlash !FilePath -- ^ @./file @
79 deriving (Eq, Ord, Show)
80 instance Pretty Header
81
82 -- ** Type 'Name'
83 type Name = TL.Text
84
85 -- ** Type 'LevelSection'
86 type LevelSection = Int
87
88 -- * Type 'Pair'
89 data Pair
90 = PairElem !ElemName !ElemAttrs -- ^ @\<name a0=v0 a1=v1>text\</name>@
91 | PairTag Bool -- ^ @\#text#@ or @^\#text#@
92 | PairAt Bool -- ^ @\@text\@@ or @^\@text\@@
93 | PairStar -- ^ @*text*@
94 | PairSlash -- ^ @/text/@
95 | PairUnderscore -- ^ @_value_@
96 | PairDash -- ^ @-text-@
97 | PairBackquote -- ^ @`text`@
98 | PairSinglequote -- ^ @'text'@
99 | PairDoublequote -- ^ @"text"@
100 | PairFrenchquote -- ^ @«text»@
101 | PairParen -- ^ @(text)@
102 | PairBrace -- ^ @{text}@
103 | PairBracket -- ^ @[text]@
104 deriving (Eq,Ord,Show)
105 instance Pretty Pair
106
107 -- * Type 'Token'
108 data Token
109 = TokenText !TL.Text
110 | TokenEscape !Char -- ^ @\\char@
111 | TokenLink !Link
112 | TokenAt !Bool !Ref -- ^ @\@foo@ or @^\@foo@
113 | TokenTag !Bool !Ref -- ^ @\#foo@ or @^\#foo@
114 deriving (Eq,Show)
115
116 -- ** Type 'Link'
117 type Link = TL.Text
118
119 -- ** Type 'Ref'
120 type Ref = TL.Text
121
122 -- * Type 'Row'
123 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
124 type Row = [Root]
125
126 -- ** Type 'Rows'
127 -- | In reverse order: a list of nodes in scope
128 -- (hence to which the next line can append to).
129 type Rows = [Root]
130
131 -- | Having an initial 'Root' simplifies 'mergeRowIndent':
132 -- one can always put the last 'Root' as a child to a previous one.
133 -- This 'Root' just has to be discarded by 'collapseRows'.
134 initRows :: Rows
135 initRows = [Tree0 $ Cell (Span "" p p :| []) $ NodeHeader HeaderDash]
136 where p = Pos{pos_line= -1, pos_column=0}
137 -- NOTE: such that any following 'Root'
138 -- is 'NodePara' if possible, and always a child.
139
140 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
141 --
142 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
143 -- they MAY span over multilines, and they can be many from a single line.
144 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
145 -- they MUST span only over a single and entire line.
146 --
147 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
148 mergeRow :: Rows -> Row -> Rows
149 mergeRow rows row =
150 debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $
151 mergeRowPrefix 0 rows $ List.reverse row
152
153 -- | Merge by considering matching prefixes.
154 --
155 -- 'HeaderGreat' and 'HeaderBar' work, not on indentation,
156 -- but on their vertical alignment as prefixes.
157 -- Hence, each new 'Row' has those prefixes zipped into a single one
158 -- when they match, are aligned and adjacent.
159 mergeRowPrefix :: ColNum -> Rows -> Row -> Rows
160 mergeRowPrefix col rows row =
161 debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $
162 case (row,rows) of
163 ([], _) -> rows
164 (_, []) -> undefined -- NOTE: cannot happen with initRows
165 ( _new@(Tree (Cell (Span _fn bn _en:|_sn) n) _ns):news
166 , _old@(Tree (Cell (Span _fo _bo eo:|_so) _o) _os):_olds ) ->
167 case collapseRowsWhile isCollapsable rows of
168 [] -> mergeRowIndent rows row
169 head@(unTree -> ch@(Cell (Span _fh bh _eh:|_sh) h)) : olds' ->
170 case (n,h) of
171 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
172 -- then check if there is a matching HeaderGreat,
173 -- if so, discard new and restart with a col advanced to new's beginning
174 (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{})
175 | isAdjacent && isMatching ch -> discard
176 -- NOTE: same for HeaderBar
177 (NodeHeader HeaderBar{}, NodeHeader HeaderBar{})
178 | isAdjacent && isMatching ch -> discard
179 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
180 -- makes it collapse entirely
181 (_, NodeHeader HeaderGreat{})
182 | col < pos_column bh -> collapse
183 -- NOTE: same for HeaderBar
184 (_, NodeHeader HeaderBar{})
185 | col < pos_column bh -> collapse
186 _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row
187 where
188 isAdjacent = pos_line bn - pos_line eo <= 1
189 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (pos_column bh) rows news
190 collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row
191 where
192 isMatching (Cell (Span _fh bh _eh:|_sh) h) =
193 pos_column bn == pos_column bh &&
194 n == h
195 isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $
196 \_t0@(unTree -> c0@(Cell (Span _f0 b0 _e0:|_s0) _n0))
197 _t1@(unTree -> Cell (Span _f1 b1 e1:|_s1) _n1) ->
198 not (isMatching c0) &&
199 (pos_line b0 - pos_line e1 <= 1) && -- adjacent
200 col < pos_column b1 -- righter than col
201
202 -- | Merge by considering indentation.
203 mergeRowIndent :: Rows -> Row -> Rows
204 mergeRowIndent rows row =
205 debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $
206 case (row,rows) of
207 ([], _) -> rows
208 (_, []) -> undefined -- NOTE: cannot happen with initRows
209 ( new@(Tree (Cell ssn@(Span fn bn en:|sn) n) ns):news
210 ,old@(Tree (Cell sso@(Span fo bo eo:|so) o) os):olds ) ->
211 case debug0 "mergeRowIndent/colNew" (pos_column bn) `compare`
212 debug0 "mergeRowIndent/colOld" (pos_column bo) of
213 -- NOTE: new is on the left
214 LT ->
215 case (n,o) of
216 -- NOTE: merge adjacent NodeText
217 -- first
218 -- second
219 (NodeText tn, NodeText to)
220 | TL.null tn || TL.null to
221 , not isVerbatim -> collapse
222 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
223 where
224 t = NodeText <$> Cell (Span fo boNew eo:|so) (indent<>to) <> Cell ssn tn
225 boNew = bo{pos_column=pos_column bn}
226 indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
227 -- | Whether the horizontal delta is made of spaces
228 isIndented =
229 debug0 "mergeRowIndent/isIndented" $
230 case olds of
231 [] -> True
232 (unTree -> (cell_location -> (span_end -> ep) :| _)) : _ ->
233 case pos_line ep `compare` pos_line bo of
234 LT -> True
235 EQ -> pos_column ep <= pos_column bn
236 _ -> False
237 _ -> collapse
238 -- NOTE: new is vertically aligned
239 EQ ->
240 case (n,o) of
241 -- NOTE: preserve all NodeText "", but still split into two NodePara
242 (NodeText tn, NodeText to)
243 | TL.null tn || TL.null to
244 , not isVerbatim -> collapse
245 | isAdjacent -> merge $ Tree (NodeText <$> Cell sso to <> Cell ssn tn) (os<>ns)
246 -- NOTE: HeaderSection can parent Nodes at the same level
247 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
248 if debug0 "mergeRowIndent/lvlNew" lvlNew
249 > debug0 "mergeRowIndent/lvlOld" lvlOld
250 -- # old
251 -- ## new
252 then concat
253 -- ## old or # old
254 -- # new # new
255 else collapse
256 -- NOTE: old is no HeaderSection, then collapse to any older and loop
257 (NodeHeader HeaderSection{}, _)
258 | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows
259 , (unTree -> (unCell -> NodeHeader HeaderSection{})) <- sec ->
260 mergeRowIndent rows' row
261 where
262 isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $
263 \_t0@(unTree -> Cell (Span _f0 b0 _e0:|_ss0) n0) _t1 ->
264 case n0 of
265 NodeHeader HeaderSection{} -> False
266 _ -> pos_column bn == pos_column b0
267 -- NOTE: in case of alignment, HeaderSection is parent
268 (_, NodeHeader HeaderSection{}) -> concat
269 --
270 _ -> replace
271 -- NOTE: new is on the right
272 GT ->
273 case (n,o) of
274 -- NOTE: keep NodeText "" out of old NodePara
275 (NodeText "", NodePara) -> collapse
276 -- NOTE: merge adjacent NodeText
277 (NodeText tn, NodeText to) ->
278 case isAdjacent of
279 _ | TL.null tn || TL.null to
280 , not isVerbatim -> collapse
281 -- old
282 -- new
283 True -> merge $ Tree (NodeText <$> Cell sso to <> Cell ssn tn) (os<>ns)
284 -- old
285 --
286 -- new
287 False -> mergeRowIndent (collapseRoot old olds) (shifted:news)
288 where
289 shifted = Tree (Cell (Span fn bnNew en:|sn) $ NodeText $ indent<>tn) (os<>ns)
290 bnNew = bn{pos_column=pos_column bo}
291 indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
292 --
293 _ -> concat
294 where
295 isAdjacent = pos_line bn - pos_line eo <= 1
296 -- | Whether a parent semantic want new to stay a NodeText
297 isVerbatim = any p rows
298 where
299 p (unTree -> (unCell -> NodeHeader HeaderBar{})) = True
300 p _ = False
301 concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows
302 merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news
303 collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row
304 replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news
305
306 -- | Like 'mergeRowIndent', but without maintaining the appending,
307 -- hence collapsing all the 'Root's of the given 'Rows'.
308 --
309 -- NOTE: 'initRows' MUST have been the first 'Rows'
310 -- before calling 'mergeRowIndent' on it to get the given 'Rows'.
311 collapseRows :: Rows -> Roots
312 collapseRows rows =
313 debug1_ "collapseRows" ("rows",rows) $
314 case collapseRowsWhile (\_new _old -> True) rows of
315 [t] -> subTrees t
316 _ -> undefined
317 -- NOTE: subTrees returns the children of the updated initRows
318
319 -- | Collapse downto any last HeaderSection, returning it and its level.
320 collapseSection :: ColNum -> Rows -> Rows
321 collapseSection col = debug1 "collapseSection" "rows" go
322 where
323 go rows@(new@(unTree -> Cell (Span _fn bn _en:|_sn) n):olds)
324 | col <= pos_column bn =
325 case n of
326 NodeHeader HeaderSection{} -> rows
327 _ -> collapseSection col $ collapseRoot new $ go olds
328 go _ = mempty
329
330 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
331 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
332 [] -> mempty
333 rows@(new@(Tree (Cell (Span _fn bn _en:|_sn) n) _ns):news) ->
334 case news of
335 [] -> rows
336 old@(Tree (Cell (Span _fo bo eo:|_so) o) _os):olds
337 | not $ test new old -> rows
338 | otherwise ->
339 case debug0 "collapseRowsWhile/colNew" (pos_column bn) `compare`
340 debug0 "collapseRowsWhile/colOld" (pos_column bo) of
341 -- NOTE: new is vertically aligned
342 EQ ->
343 case (n,o) of
344 -- NOTE: HeaderSection can parent Nodes at the same level
345 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
346 if debug0 "collapseRowsWhile/lvlNew" lvlNew
347 > debug0 "collapseRowsWhile/lvlOld" lvlOld
348 -- # old
349 -- ## new
350 then collapse
351 -- ## old or # old
352 -- # new # new
353 else
354 debug "collapseRowsWhile/replace" $
355 collapseRowsWhile test $ (new:) $ collapseRoot old olds
356 -- NOTE: old is no HeaderSection, then collapse to any older and loop
357 (NodeHeader HeaderSection{}, _)
358 | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news
359 , (unTree -> (unCell -> NodeHeader HeaderSection{})) <- sec ->
360 collapseRowsWhile test news'
361 where
362 isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $
363 \_t0@(unTree -> Cell (Span _f0 b0 _e0:|_s0) n0) _t1 ->
364 case n0 of
365 NodeHeader HeaderSection{} -> False
366 _ -> pos_column bn == pos_column b0
367 -- NOTE: in case of alignment, HeaderSection is parent
368 (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse
369 -- NOTE: merge within old NodePara.
370 (_, NodePara) | isAdjacent -> collapse
371 --
372 _ -> collapse2
373 -- NOTE: new is either on the left or on the right
374 _ -> collapse
375 where
376 isAdjacent = pos_line bn - pos_line eo <= 1
377 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news
378 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds
379
380 -- | Put a 'Root' as a child of the head 'Root'.
381 --
382 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
383 --
384 -- NOTE: any NodeText/NodeText merging must have been done before.
385 collapseRoot :: Root -> Rows -> Rows
386 collapseRoot new@(Tree (Cell ssn@(Span _fn bn en:|_sn) n) _ns) rows =
387 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
388 case rows of
389 [] -> return new
390 old@(Tree (Cell (Span fo bo eo:|so) o) os) : olds ->
391 case (n,o) of
392 -- NOTE: no child into NodeText
393 (_, NodeText{}) -> collapse2
394 -- NOTE: NodeText can begin a NodePara
395 (NodeText tn, _) | not $ TL.null tn ->
396 case o of
397 -- NOTE: no NodePara within those
398 NodeHeader HeaderEqual{} -> collapse
399 NodeHeader HeaderBar{} -> collapse
400 NodeHeader HeaderDashDash{} -> collapse
401 -- NOTE: NodePara within those
402 NodePara | not isAdjacent -> para
403 NodeHeader{} -> para
404 _ -> collapse
405 -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara.
406 -- Note that since a NodePara is never adjacent to another,
407 -- it is not nested within another.
408 (_, NodePara)
409 | isAdjacent ->
410 case n of
411 -- NOTE: no HeaderSection (even adjacent) within a NodePara
412 NodeHeader HeaderSection{} -> collapse2
413 _ -> collapse
414 | otherwise -> collapse2
415 _ -> collapse
416 where
417 isAdjacent = pos_line bn - pos_line eo <= 1
418 para = Tree (Cell ssn NodePara) (return new) : rows
419 collapse = Tree (Cell (Span fo bo en:|so) o) (os |> new) : olds
420 collapse2 = collapseRoot new $ collapseRoot old olds