]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Tree.hs
vim: polish code
[doclang.git] / Hdoc / TCT / Tree.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Hdoc.TCT.Tree
5 ( module Hdoc.TCT.Tree
6 , Tree(..), Trees
7 ) where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
16 import Data.Int (Int)
17 import Data.List.NonEmpty (NonEmpty(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ordering(..), Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence ((|>))
22 import Data.TreeSeq.Strict (Tree(..), Trees)
23 import Prelude (undefined, Num(..))
24 import System.FilePath (FilePath)
25 import Text.Show (Show(..))
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text.Lazy as TL
29
30 import Hdoc.TCT.Utils
31 import Hdoc.TCT.Cell
32 import Hdoc.TCT.Elem
33 import Hdoc.TCT.Debug
34
35 -- * Type 'Root'
36 -- | A single 'Tree' to gather all the 'Node's
37 -- simplifies greatly the navigation and transformations,
38 -- especially because the later XML or DTC output
39 -- are themselves a single tree-like data structure.
40 --
41 -- Also, having a single 'Tree' is easier to merge
42 -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' "li" ""))@),
43 -- and XML coming from the second parsing phase (eg. @NodePair (PairElem "li" [])@).
44 --
45 -- For error reporting, indentation sensitivity and paragraph grouping,
46 -- each 'Node' is annotated with a 'Cell'
47 -- spanning over all its content (sub-'Trees' included).
48 type Root = Tree (Cell Node)
49 type Roots = Trees (Cell Node)
50
51 pattern Tree0 :: a -> Tree a
52 pattern Tree0 a <- Tree a (null -> True)
53 where Tree0 a = Tree a mempty
54
55 -- * Type 'Node'
56 data Node
57 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
58 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
59 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
60 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
61 | NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
62 | NodePara -- ^ node, gather trees by paragraph,
63 -- useful to know when to generate a <para> XML node
64 deriving (Eq,Show)
65 instance Pretty Node
66
67 -- * Type 'Header'
68 data Header
69 = HeaderColon !Name !White -- ^ @name: @
70 | HeaderEqual !Name !White -- ^ @name=@
71 | HeaderBar !Name !White -- ^ @name|@
72 | HeaderGreat !Name !White -- ^ @name>@
73 | HeaderBrackets !Name -- ^ @[name]@
74 | HeaderDot !Name -- ^ @1. @
75 | HeaderDash -- ^ @- @
76 | HeaderDashDash -- ^ @-- @
77 | HeaderSection !LevelSection -- ^ @# @
78 | HeaderDotSlash !FilePath -- ^ @./file @
79 deriving (Eq, Ord, Show)
80 instance Pretty Header
81
82 -- ** Type 'Name'
83 type Name = TL.Text
84
85 -- ** Type 'LevelSection'
86 type LevelSection = Int
87
88 -- * Type 'Pair'
89 data Pair
90 = PairElem !ElemName !ElemAttrs -- ^ @\<name a0=v0 a1=v1>text\</name>@
91 | PairTag Bool -- ^ @\#text\#@ or @~\#text\#@
92 | PairAt Bool -- ^ @\@text\@@ or @~\@text\@@
93 | PairStar -- ^ @*text*@
94 | PairSlash -- ^ @/text/@
95 | PairUnderscore -- ^ @_value_@
96 | PairDash -- ^ @-text-@
97 | PairBackquote -- ^ @`text`@
98 | PairSinglequote -- ^ @'text'@
99 | PairDoublequote -- ^ @"text"@
100 | PairFrenchquote -- ^ @«text»@
101 | PairParen -- ^ @(text)@
102 | PairBrace -- ^ @{text}@
103 | PairBracket -- ^ @[text]@
104 deriving (Eq,Ord,Show)
105 instance Pretty Pair
106
107 -- * Type 'Token'
108 data Token
109 = TokenText !TL.Text
110 | TokenEscape !Char -- ^ @\\char@
111 | TokenLink !Link
112 | TokenAt !Bool !Ref -- ^ @\@foo@ or @~\@foo@
113 | TokenTag !Bool !Ref -- ^ @\#foo@ or @~\#foo@
114 deriving (Eq,Show)
115
116 {-
117 -- * Type 'Filter'
118 data Filter
119 = FilterAnd !Filter !Filter
120 | FilterOr !Filter !Filter
121 | FilterNot !Filter
122 | FilterTag !Ref
123 | FilterAt !Ref
124 deriving (Eq,Ord,Show)
125 -}
126
127 -- ** Type 'Link'
128 type Link = TL.Text
129
130 -- ** Type 'Ref'
131 type Ref = TL.Text
132
133 -- * Type 'Row'
134 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
135 type Row = [Root]
136
137 -- ** Type 'Rows'
138 -- | In reverse order: a list of nodes in scope
139 -- (hence to which the next line can append to).
140 type Rows = [Root]
141
142 -- | Having an initial 'Root' simplifies 'mergeRowIndent':
143 -- one can always put the last 'Root' as a child to a previous one.
144 -- This 'Root' just has to be discarded by 'collapseRows'.
145 initRows :: Rows
146 initRows = [Tree0 $ Cell (Span "" p p :| []) $ NodeHeader HeaderDash]
147 where p = Pos{pos_line= -1, pos_column=0}
148 -- NOTE: such that any following 'Root'
149 -- is 'NodePara' if possible, and always a child.
150
151 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
152 --
153 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
154 -- they MAY span over multilines, and they can be many from a single line.
155 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
156 -- they MUST span only over a single and entire line.
157 --
158 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
159 mergeRow :: Rows -> Row -> Rows
160 mergeRow rows row =
161 debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $
162 mergeRowPrefix 0 rows $ List.reverse row
163
164 -- | Merge by considering matching prefixes.
165 --
166 -- 'HeaderGreat' and 'HeaderBar' work, not on indentation,
167 -- but on their vertical alignment as prefixes.
168 -- Hence, each new 'Row' has those prefixes zipped into a single one
169 -- when they match, are aligned and adjacent.
170 mergeRowPrefix :: ColNum -> Rows -> Row -> Rows
171 mergeRowPrefix col rows row =
172 debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $
173 case (row,rows) of
174 ([], _) -> rows
175 (_, []) -> undefined -- NOTE: cannot happen with initRows
176 ( _new@(Tree (Cell (Span _fn bn _en:|_sn) n) _ns):news
177 , _old@(Tree (Cell (Span _fo _bo eo:|_so) _o) _os):_olds ) ->
178 case collapseRowsWhile isCollapsable rows of
179 [] -> mergeRowIndent rows row
180 head@(unTree -> ch@(Cell (Span _fh bh _eh:|_sh) h)) : olds' ->
181 case (n,h) of
182 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
183 -- then check if there is a matching HeaderGreat,
184 -- if so, discard new and restart with a col advanced to new's beginning
185 (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{})
186 | isAdjacent && isMatching ch -> discard
187 -- NOTE: same for HeaderBar
188 (NodeHeader HeaderBar{}, NodeHeader HeaderBar{})
189 | isAdjacent && isMatching ch -> discard
190 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
191 -- makes it collapse entirely
192 (_, NodeHeader HeaderGreat{})
193 | col < pos_column bh -> collapse
194 -- NOTE: same for HeaderBar
195 (_, NodeHeader HeaderBar{})
196 | col < pos_column bh -> collapse
197 _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row
198 where
199 isAdjacent = pos_line bn - pos_line eo <= 1
200 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (pos_column bh) rows news
201 collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row
202 where
203 isMatching (Cell (Span _fh bh _eh:|_sh) h) =
204 pos_column bn == pos_column bh &&
205 n == h
206 isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $
207 \_t0@(unTree -> c0@(Cell (Span _f0 b0 _e0:|_s0) _n0))
208 _t1@(unTree -> Cell (Span _f1 b1 e1:|_s1) _n1) ->
209 not (isMatching c0) &&
210 (pos_line b0 - pos_line e1 <= 1) && -- adjacent
211 col < pos_column b1 -- righter than col
212
213 -- | Merge by considering indentation.
214 mergeRowIndent :: Rows -> Row -> Rows
215 mergeRowIndent rows row =
216 debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $
217 case (row,rows) of
218 ([], _) -> rows
219 (_, []) -> undefined -- NOTE: cannot happen with initRows
220 ( new@(Tree (Cell ssn@(Span fn bn en:|sn) n) ns):news
221 ,old@(Tree (Cell sso@(Span fo bo eo:|so) o) os):olds ) ->
222 case debug0 "mergeRowIndent/colNew" (pos_column bn) `compare`
223 debug0 "mergeRowIndent/colOld" (pos_column bo) of
224 -- NOTE: new is on the left
225 LT ->
226 case (n,o) of
227 -- NOTE: merge adjacent NodeText
228 -- first
229 -- second
230 (NodeText tn, NodeText to)
231 | TL.null tn || TL.null to
232 , not isVerbatim -> collapse
233 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
234 where
235 t = NodeText <$> Cell (Span fo boNew eo:|so) (indent<>to) <> Cell ssn tn
236 boNew = bo{pos_column=pos_column bn}
237 indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
238 -- | Whether the horizontal delta is made of spaces
239 isIndented =
240 debug0 "mergeRowIndent/isIndented" $
241 case olds of
242 [] -> True
243 (unTree -> (cell_location -> (span_end -> ep) :| _)) : _ ->
244 case pos_line ep `compare` pos_line bo of
245 LT -> True
246 EQ -> pos_column ep <= pos_column bn
247 _ -> False
248 _ -> collapse
249 -- NOTE: new is vertically aligned
250 EQ ->
251 case (n,o) of
252 -- NOTE: preserve all NodeText "", but still split into two NodePara
253 (NodeText tn, NodeText to)
254 | TL.null tn || TL.null to
255 , not isVerbatim -> collapse
256 | isAdjacent -> merge $ Tree (NodeText <$> Cell sso to <> Cell ssn tn) (os<>ns)
257 -- NOTE: HeaderSection can parent Nodes at the same level
258 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
259 if debug0 "mergeRowIndent/lvlNew" lvlNew
260 > debug0 "mergeRowIndent/lvlOld" lvlOld
261 -- # old
262 -- ## new
263 then concat
264 -- ## old or # old
265 -- # new # new
266 else collapse
267 -- NOTE: old is no HeaderSection, then collapse to any older and loop
268 (NodeHeader HeaderSection{}, _)
269 | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows
270 , (unTree -> (unCell -> NodeHeader HeaderSection{})) <- sec ->
271 mergeRowIndent rows' row
272 where
273 isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $
274 \_t0@(unTree -> Cell (Span _f0 b0 _e0:|_ss0) n0) _t1 ->
275 case n0 of
276 NodeHeader HeaderSection{} -> False
277 _ -> pos_column bn == pos_column b0
278 -- NOTE: in case of alignment, HeaderSection is parent
279 (_, NodeHeader HeaderSection{}) -> concat
280 --
281 _ -> replace
282 -- NOTE: new is on the right
283 GT ->
284 case (n,o) of
285 -- NOTE: keep NodeText "" out of old NodePara
286 (NodeText "", NodePara) -> collapse
287 -- NOTE: merge adjacent NodeText
288 (NodeText tn, NodeText to) ->
289 case isAdjacent of
290 _ | TL.null tn || TL.null to
291 , not isVerbatim -> collapse
292 -- old
293 -- new
294 True -> merge $ Tree (NodeText <$> Cell sso to <> Cell ssn tn) (os<>ns)
295 -- old
296 --
297 -- new
298 False -> mergeRowIndent (collapseRoot old olds) (shifted:news)
299 where
300 shifted = Tree (Cell (Span fn bnNew en:|sn) $ NodeText $ indent<>tn) (os<>ns)
301 bnNew = bn{pos_column=pos_column bo}
302 indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
303 --
304 _ -> concat
305 where
306 isAdjacent = pos_line bn - pos_line eo <= 1
307 -- | Whether a parent semantic want new to stay a NodeText
308 isVerbatim = any p rows
309 where
310 p (unTree -> (unCell -> NodeHeader HeaderBar{})) = True
311 p _ = False
312 concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows
313 merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news
314 collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row
315 replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news
316
317 -- | Like 'mergeRowIndent', but without maintaining the appending,
318 -- hence collapsing all the 'Root's of the given 'Rows'.
319 --
320 -- NOTE: 'initRows' MUST have been the first 'Rows'
321 -- before calling 'mergeRowIndent' on it to get the given 'Rows'.
322 collapseRows :: Rows -> Roots
323 collapseRows rows =
324 debug1_ "collapseRows" ("rows",rows) $
325 case collapseRowsWhile (\_new _old -> True) rows of
326 [t] -> subTrees t
327 _ -> undefined
328 -- NOTE: subTrees returns the children of the updated initRows
329
330 -- | Collapse downto any last HeaderSection, returning it and its level.
331 collapseSection :: ColNum -> Rows -> Rows
332 collapseSection col = debug1 "collapseSection" "rows" go
333 where
334 go rows@(new@(unTree -> Cell (Span _fn bn _en:|_sn) n):olds)
335 | col <= pos_column bn =
336 case n of
337 NodeHeader HeaderSection{} -> rows
338 _ -> collapseSection col $ collapseRoot new $ go olds
339 go _ = mempty
340
341 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
342 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
343 [] -> mempty
344 rows@(new@(Tree (Cell (Span _fn bn _en:|_sn) n) _ns):news) ->
345 case news of
346 [] -> rows
347 old@(Tree (Cell (Span _fo bo eo:|_so) o) _os):olds
348 | not $ test new old -> rows
349 | otherwise ->
350 case debug0 "collapseRowsWhile/colNew" (pos_column bn) `compare`
351 debug0 "collapseRowsWhile/colOld" (pos_column bo) of
352 -- NOTE: new is vertically aligned
353 EQ ->
354 case (n,o) of
355 -- NOTE: HeaderSection can parent Nodes at the same level
356 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
357 if debug0 "collapseRowsWhile/lvlNew" lvlNew
358 > debug0 "collapseRowsWhile/lvlOld" lvlOld
359 -- # old
360 -- ## new
361 then collapse
362 -- ## old or # old
363 -- # new # new
364 else
365 debug "collapseRowsWhile/replace" $
366 collapseRowsWhile test $ (new:) $ collapseRoot old olds
367 -- NOTE: old is no HeaderSection, then collapse to any older and loop
368 (NodeHeader HeaderSection{}, _)
369 | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news
370 , (unTree -> (unCell -> NodeHeader HeaderSection{})) <- sec ->
371 collapseRowsWhile test news'
372 where
373 isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $
374 \_t0@(unTree -> Cell (Span _f0 b0 _e0:|_s0) n0) _t1 ->
375 case n0 of
376 NodeHeader HeaderSection{} -> False
377 _ -> pos_column bn == pos_column b0
378 -- NOTE: in case of alignment, HeaderSection is parent
379 (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse
380 -- NOTE: merge within old NodePara.
381 (_, NodePara) | isAdjacent -> collapse
382 --
383 _ -> collapse2
384 -- NOTE: new is either on the left or on the right
385 _ -> collapse
386 where
387 isAdjacent = pos_line bn - pos_line eo <= 1
388 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news
389 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds
390
391 -- | Put a 'Root' as a child of the head 'Root'.
392 --
393 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
394 --
395 -- NOTE: any NodeText/NodeText merging must have been done before.
396 collapseRoot :: Root -> Rows -> Rows
397 collapseRoot new@(Tree (Cell ssn@(Span _fn bn en:|_sn) n) _ns) rows =
398 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
399 case rows of
400 [] -> return new
401 old@(Tree (Cell (Span fo bo eo:|so) o) os) : olds ->
402 case (n,o) of
403 -- NOTE: no child into NodeText
404 (_, NodeText{}) -> collapse2
405 -- NOTE: NodeText can begin a NodePara
406 (NodeText tn, _) | not $ TL.null tn ->
407 case o of
408 -- NOTE: no NodePara within those
409 NodeHeader HeaderEqual{} -> collapse
410 NodeHeader HeaderBar{} -> collapse
411 NodeHeader HeaderDashDash{} -> collapse
412 -- NOTE: NodePara within those
413 NodePara | not isAdjacent -> para
414 NodeHeader{} -> para
415 _ -> collapse
416 -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara.
417 -- Note that since a NodePara is never adjacent to another,
418 -- it is not nested within another.
419 (_, NodePara)
420 | isAdjacent ->
421 case n of
422 -- NOTE: no HeaderSection (even adjacent) within a NodePara
423 NodeHeader HeaderSection{} -> collapse2
424 _ -> collapse
425 | otherwise -> collapse2
426 _ -> collapse
427 where
428 isAdjacent = pos_line bn - pos_line eo <= 1
429 para = Tree (Cell ssn NodePara) (return new) : rows
430 collapse = Tree (Cell (Span fo bo en:|so) o) (os |> new) : olds
431 collapse2 = collapseRoot new $ collapseRoot old olds