]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Tree.hs
Update to megaparsec-7 and new symantic-xml
[doclang.git] / Hdoc / TCT / Tree.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Hdoc.TCT.Tree
5 ( module Hdoc.TCT.Tree
6 , Tree(..), Trees
7 ) where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
16 import Data.Int (Int)
17 import Data.List.NonEmpty (NonEmpty(..))
18 import Data.Maybe (Maybe(..))
19 import Data.Monoid (Monoid(..))
20 import Data.Ord (Ordering(..), Ord(..))
21 import Data.Semigroup (Semigroup(..))
22 import Data.Sequence ((|>))
23 import Data.TreeSeq.Strict (Tree(..), Trees)
24 import Prelude (undefined, Num(..), Bounded(..))
25 import System.FilePath (FilePath)
26 import Text.Show (Show(..))
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30 import qualified Text.Megaparsec as P
31
32 import Hdoc.TCT.Utils
33 import Hdoc.TCT.Cell
34 import Hdoc.TCT.Elem
35 import Hdoc.TCT.Debug
36
37 -- * Type 'Root'
38 -- | A single 'Tree' to gather all the 'Node's
39 -- simplifies greatly the navigation and transformations,
40 -- especially because the later XML or DTC output
41 -- are themselves a single tree-like data structure.
42 --
43 -- Also, having a single 'Tree' is easier to merge
44 -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' ('Just' "li") ""))@),
45 -- and XML coming from the second parsing phase (eg. @'NodePair' ('PairElem' ('Just' "li") [])@).
46 --
47 -- For error reporting, indentation sensitivity and paragraph grouping,
48 -- each 'Node' is annotated with a 'Cell'
49 -- spanning over all its content (sub-'Trees' included).
50 type Root = Tree (Cell Node)
51 type Roots = Trees (Cell Node)
52
53 pattern Tree0 :: a -> Tree a
54 pattern Tree0 a <- Tree a (null -> True)
55 where Tree0 a = Tree a mempty
56
57 -- * Type 'Node'
58 data Node
59 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
60 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
61 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
62 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
63 | NodeLower !ElemName !ElemAttrs -- ^ node, @<name a=b@
64 | NodePara -- ^ node, gather trees by paragraph,
65 -- useful to know when to generate a <para> XML node
66 deriving (Eq,Show)
67 instance Pretty Node
68
69 -- * Type 'Header'
70 data Header
71 = HeaderColon !(Maybe ElemName) !White -- ^ @name: @
72 | HeaderEqual !ElemName !White -- ^ @name=@
73 | HeaderBar !(Maybe ElemName) !White -- ^ @name|@
74 | HeaderGreat !(Maybe ElemName) !White -- ^ @name>@
75 | HeaderBrackets !TL.Text -- ^ @[name]@
76 | HeaderDot !TL.Text -- ^ @1. @
77 | HeaderDash -- ^ @- @
78 | HeaderDashDash -- ^ @-- @
79 | HeaderSection !LevelSection -- ^ @# @
80 | HeaderDotSlash !FilePath -- ^ @./file @
81 deriving (Eq, Ord, Show)
82 instance Pretty Header
83
84 {-
85 -- ** Type 'Name'
86 type Name = TL.Text
87 -}
88
89 -- ** Type 'LevelSection'
90 type LevelSection = Int
91
92 -- * Type 'Pair'
93 data Pair
94 = PairElem !ElemName !ElemAttrs -- ^ @\<name a0=v0 a1=v1>text\</name>@
95 | PairTag Bool -- ^ @\#text\#@ or @~\#text\#@
96 | PairAt Bool -- ^ @\@text\@@ or @~\@text\@@
97 | PairStar -- ^ @*text*@
98 | PairSlash -- ^ @/text/@
99 | PairUnderscore -- ^ @_value_@
100 | PairDash -- ^ @-text-@
101 | PairBackquote -- ^ @`text`@
102 | PairSinglequote -- ^ @'text'@
103 | PairDoublequote -- ^ @"text"@
104 | PairFrenchquote -- ^ @«text»@
105 | PairParen -- ^ @(text)@
106 | PairBrace -- ^ @{text}@
107 | PairBracket -- ^ @[text]@
108 deriving (Eq,Ord,Show)
109 instance Pretty Pair
110
111 -- * Type 'Token'
112 data Token
113 = TokenText !TL.Text
114 | TokenEscape !Char -- ^ @\\char@
115 | TokenLink !Link
116 | TokenAt !Bool !Ref -- ^ @\@foo@ or @~\@foo@
117 | TokenTag !Bool !Ref -- ^ @\#foo@ or @~\#foo@
118 deriving (Eq,Show)
119
120 {-
121 -- * Type 'Filter'
122 data Filter
123 = FilterAnd !Filter !Filter
124 | FilterOr !Filter !Filter
125 | FilterNot !Filter
126 | FilterTag !Ref
127 | FilterAt !Ref
128 deriving (Eq,Ord,Show)
129 -}
130
131 -- ** Type 'Link'
132 type Link = TL.Text
133
134 -- ** Type 'Ref'
135 type Ref = TL.Text
136
137 -- * Type 'Row'
138 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
139 type Row = [Root]
140
141 -- ** Type 'Rows'
142 -- | In reverse order: a list of nodes in scope
143 -- (hence to which the next line can append to).
144 type Rows = [Root]
145
146 -- | Having an initial 'Root' simplifies 'mergeRowIndent':
147 -- one can always put the last 'Root' as a child to a previous one.
148 -- This 'Root' just has to be discarded by 'collapseRows'.
149 initRows :: Rows
150 initRows = [Tree0 $ Sourced (FileRange "" p p :| []) $ NodeHeader HeaderDash]
151 where
152 p = LineColumn{lineNum=numM1, colNum=num0}
153 -- NOTE: lineNum= -1 is a hack so that any following 'Root'
154 -- becomes a 'NodePara' if possible, and always a child.
155 numM1 = numMax<>numMax<>P.pos1
156 num0 = numM1<>P.pos1
157 numMax = P.mkPos maxBound
158
159 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
160 --
161 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
162 -- they MAY span over multilines, and they can be many from a single line.
163 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
164 -- they MUST span only over a single and entire line.
165 --
166 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
167 mergeRow :: Rows -> Row -> Rows
168 mergeRow rows row =
169 debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $
170 mergeRowPrefix 0 rows $ List.reverse row
171
172 -- | Merge by considering matching prefixes.
173 --
174 -- 'HeaderGreat' and 'HeaderBar' work, not on indentation,
175 -- but on their vertical alignment as prefixes.
176 -- Hence, each new 'Row' has those prefixes zipped into a single one
177 -- when they match, are aligned and adjacent.
178 mergeRowPrefix :: ColInt -> Rows -> Row -> Rows
179 mergeRowPrefix col rows row =
180 debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $
181 case (row,rows) of
182 ([], _) -> rows
183 (_, []) -> undefined -- NOTE: cannot happen with initRows
184 ( _new@(Tree (Sourced (FileRange _fn bn _en:|_sn) n) _ns):news
185 , _old@(Tree (Sourced (FileRange _fo _bo eo:|_so) _o) _os):_olds ) ->
186 case collapseRowsWhile isCollapsable rows of
187 [] -> mergeRowIndent rows row
188 head@(unTree -> ch@(Sourced (FileRange _fh bh _eh:|_sh) h)) : olds' ->
189 case (n,h) of
190 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
191 -- then check if there is a matching HeaderGreat,
192 -- if so, discard new and restart with a col advanced to new's beginning
193 (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{})
194 | isAdjacent && isMatching ch -> discard
195 -- NOTE: same for HeaderBar
196 (NodeHeader HeaderBar{}, NodeHeader HeaderBar{})
197 | isAdjacent && isMatching ch -> discard
198 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
199 -- makes it collapse entirely
200 (_, NodeHeader HeaderGreat{})
201 | col < colInt bh -> collapse
202 -- NOTE: same for HeaderBar
203 (_, NodeHeader HeaderBar{})
204 | col < colInt bh -> collapse
205 _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row
206 where
207 isAdjacent = lineInt bn - lineInt eo <= 1
208 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (colInt bh) rows news
209 collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row
210 where
211 isMatching (Sourced (FileRange _fh bh _eh:|_sh) h) =
212 colInt bn == colInt bh &&
213 n == h
214 isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $
215 \_t0@(unTree -> c0@(Sourced (FileRange _f0 b0 _e0:|_s0) _n0))
216 _t1@(unTree -> Sourced (FileRange _f1 b1 e1:|_s1) _n1) ->
217 not (isMatching c0) &&
218 (lineInt b0 - lineInt e1 <= 1) && -- adjacent
219 col < colInt b1 -- righter than col
220
221 -- | Merge by considering indentation.
222 mergeRowIndent :: Rows -> Row -> Rows
223 mergeRowIndent rows row =
224 debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $
225 case (row,rows) of
226 ([], _) -> rows
227 (_, []) -> undefined -- NOTE: cannot happen with initRows
228 ( new@(Tree (Sourced ssn@(FileRange fn bn en:|sn) n) ns):news
229 ,old@(Tree (Sourced sso@(FileRange fo bo eo:|so) o) os):olds ) ->
230 case debug0 "mergeRowIndent/colNew" (colInt bn) `compare`
231 debug0 "mergeRowIndent/colOld" (colInt bo) of
232 -- NOTE: new is on the left
233 LT ->
234 case (n,o) of
235 -- NOTE: merge adjacent NodeText
236 -- first
237 -- second
238 (NodeText tn, NodeText to)
239 | TL.null tn || TL.null to
240 , not isVerbatim -> collapse
241 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
242 where
243 t = NodeText <$> Sourced (FileRange fo boNew eo:|so) (indent<>to) <> Sourced ssn tn
244 boNew = bo{colNum=colNum bn}
245 indent = TL.replicate (int64 $ colInt bo - colInt bn) " "
246 -- | Whether the horizontal delta is made of spaces
247 isIndented =
248 debug0 "mergeRowIndent/isIndented" $
249 case olds of
250 [] -> True
251 (unTree -> (source -> (fileRange_end -> ep) :| _)) : _ ->
252 case lineInt ep `compare` lineInt bo of
253 LT -> True
254 EQ -> colInt ep <= colInt bn
255 _ -> False
256 _ -> collapse
257 -- NOTE: new is vertically aligned
258 EQ ->
259 case (n,o) of
260 -- NOTE: preserve all NodeText "", but still split into two NodePara
261 (NodeText tn, NodeText to)
262 | TL.null tn || TL.null to
263 , not isVerbatim -> collapse
264 | isAdjacent -> merge $ Tree (NodeText <$> Sourced sso to <> Sourced ssn tn) (os<>ns)
265 -- NOTE: HeaderSection can parent Nodes at the same level
266 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
267 if debug0 "mergeRowIndent/lvlNew" lvlNew
268 > debug0 "mergeRowIndent/lvlOld" lvlOld
269 -- # old
270 -- ## new
271 then concat
272 -- ## old or # old
273 -- # new # new
274 else collapse
275 -- NOTE: old is no HeaderSection, then collapse to any older and loop
276 (NodeHeader HeaderSection{}, _)
277 | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows
278 , (unTree -> (unSourced -> NodeHeader HeaderSection{})) <- sec ->
279 mergeRowIndent rows' row
280 where
281 isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $
282 \_t0@(unTree -> Sourced (FileRange _f0 b0 _e0:|_ss0) n0) _t1 ->
283 case n0 of
284 NodeHeader HeaderSection{} -> False
285 _ -> colInt bn == colInt b0
286 -- NOTE: in case of alignment, HeaderSection is parent
287 (_, NodeHeader HeaderSection{}) -> concat
288 --
289 _ -> replace
290 -- NOTE: new is on the right
291 GT ->
292 case (n,o) of
293 -- NOTE: keep NodeText "" out of old NodePara
294 (NodeText "", NodePara) -> collapse
295 -- NOTE: merge adjacent NodeText
296 (NodeText tn, NodeText to) ->
297 case isAdjacent of
298 _ | TL.null tn || TL.null to
299 , not isVerbatim -> collapse
300 -- old
301 -- new
302 True -> merge $ Tree (NodeText <$> Sourced sso to <> Sourced ssn tn) (os<>ns)
303 -- old
304 --
305 -- new
306 False -> mergeRowIndent (collapseRoot old olds) (shifted:news)
307 where
308 shifted = Tree (Sourced (FileRange fn bnNew en:|sn) $ NodeText $ indent<>tn) (os<>ns)
309 bnNew = bn{colNum=colNum bo}
310 indent = TL.replicate (int64 $ colInt bn - colInt bo) " "
311 --
312 _ -> concat
313 where
314 isAdjacent = lineInt bn - lineInt eo <= 1
315 -- | Whether a parent semantic want new to stay a NodeText
316 isVerbatim = any p rows
317 where
318 p (unTree -> (unSourced -> NodeHeader HeaderBar{})) = True
319 p _ = False
320 concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows
321 merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news
322 collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row
323 replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news
324
325 -- | Like 'mergeRowIndent', but without maintaining the appending,
326 -- hence collapsing all the 'Root's of the given 'Rows'.
327 --
328 -- NOTE: 'initRows' MUST have been the first 'Rows'
329 -- before calling 'mergeRowIndent' on it to get the given 'Rows'.
330 collapseRows :: Rows -> Roots
331 collapseRows rows =
332 debug1_ "collapseRows" ("rows",rows) $
333 case collapseRowsWhile (\_new _old -> True) rows of
334 [t] -> subTrees t
335 _ -> undefined
336 -- NOTE: subTrees returns the children of the updated initRows
337
338 -- | Collapse downto any last HeaderSection, returning it and its level.
339 collapseSection :: ColInt -> Rows -> Rows
340 collapseSection col = debug1 "collapseSection" "rows" go
341 where
342 go rows@(new@(unTree -> Sourced (FileRange _fn bn _en:|_sn) n):olds)
343 | col <= colInt bn =
344 case n of
345 NodeHeader HeaderSection{} -> rows
346 _ -> collapseSection col $ collapseRoot new $ go olds
347 go _ = mempty
348
349 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
350 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
351 [] -> mempty
352 rows@(new@(Tree (Sourced (FileRange _fn bn _en:|_sn) n) _ns):news) ->
353 case news of
354 [] -> rows
355 old@(Tree (Sourced (FileRange _fo bo eo:|_so) o) _os):olds
356 | not $ test new old -> rows
357 | otherwise ->
358 case debug0 "collapseRowsWhile/colNew" (colInt bn) `compare`
359 debug0 "collapseRowsWhile/colOld" (colInt bo) of
360 -- NOTE: new is vertically aligned
361 EQ ->
362 case (n,o) of
363 -- NOTE: HeaderSection can parent Nodes at the same level
364 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
365 if debug0 "collapseRowsWhile/lvlNew" lvlNew
366 > debug0 "collapseRowsWhile/lvlOld" lvlOld
367 -- # old
368 -- ## new
369 then collapse
370 -- ## old or # old
371 -- # new # new
372 else
373 debug "collapseRowsWhile/replace" $
374 collapseRowsWhile test $ (new:) $ collapseRoot old olds
375 -- NOTE: old is no HeaderSection, then collapse to any older and loop
376 (NodeHeader HeaderSection{}, _)
377 | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news
378 , (unTree -> (unSourced -> NodeHeader HeaderSection{})) <- sec ->
379 collapseRowsWhile test news'
380 where
381 isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $
382 \_t0@(unTree -> Sourced (FileRange _f0 b0 _e0:|_s0) n0) _t1 ->
383 case n0 of
384 NodeHeader HeaderSection{} -> False
385 _ -> colInt bn == colInt b0
386 -- NOTE: in case of alignment, HeaderSection is parent
387 (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse
388 -- NOTE: merge within old NodePara.
389 (_, NodePara) | isAdjacent -> collapse
390 --
391 _ -> collapse2
392 -- NOTE: new is either on the left or on the right
393 _ -> collapse
394 where
395 isAdjacent = lineInt bn - lineInt eo <= 1
396 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news
397 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds
398
399 -- | Put a 'Root' as a child of the head 'Root'.
400 --
401 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
402 --
403 -- NOTE: any NodeText/NodeText merging must have been done before.
404 collapseRoot :: Root -> Rows -> Rows
405 collapseRoot new@(Tree (Sourced ssn@(FileRange _fn bn en:|_sn) n) _ns) rows =
406 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
407 case rows of
408 [] -> return new
409 old@(Tree (Sourced (FileRange fo bo eo:|so) o) os) : olds ->
410 case (n,o) of
411 -- NOTE: no child into NodeText
412 (_, NodeText{}) -> collapse2
413 -- NOTE: NodeText can begin a NodePara
414 (NodeText tn, _) | not $ TL.null tn ->
415 case o of
416 -- NOTE: no NodePara within those
417 NodeHeader HeaderEqual{} -> collapse
418 NodeHeader HeaderBar{} -> collapse
419 NodeHeader HeaderDashDash{} -> collapse
420 -- NOTE: NodePara within those
421 NodePara | not isAdjacent -> para
422 NodeHeader{} -> para
423 _ -> collapse
424 -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara.
425 -- Note that since a NodePara is never adjacent to another,
426 -- it is not nested within another.
427 (_, NodePara)
428 | isAdjacent ->
429 case n of
430 -- NOTE: no HeaderSection (even adjacent) within a NodePara
431 NodeHeader HeaderSection{} -> collapse2
432 _ -> collapse
433 | otherwise -> collapse2
434 _ -> collapse
435 where
436 isAdjacent = lineInt bn - lineInt eo <= 1
437 para = Tree (Sourced ssn NodePara) (return new) : rows
438 collapse = Tree (Sourced (FileRange fo bo en:|so) o) (os |> new) : olds
439 collapse2 = collapseRoot new $ collapseRoot old olds