]> Git — Sourcephile - doclang.git/blob - Hdoc/TCT/Tree.hs
XML: use symantic-xml
[doclang.git] / Hdoc / TCT / Tree.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Hdoc.TCT.Tree
5 ( module Hdoc.TCT.Tree
6 , Tree(..), Trees
7 ) where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
16 import Data.Int (Int)
17 import Data.List.NonEmpty (NonEmpty(..))
18 import Data.Maybe (Maybe(..))
19 import Data.Monoid (Monoid(..))
20 import Data.Ord (Ordering(..), Ord(..))
21 import Data.Semigroup (Semigroup(..))
22 import Data.Sequence ((|>))
23 import Data.TreeSeq.Strict (Tree(..), Trees)
24 import Prelude (undefined, Num(..))
25 import System.FilePath (FilePath)
26 import Text.Show (Show(..))
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
30
31 import Hdoc.TCT.Utils
32 import Hdoc.TCT.Cell
33 import Hdoc.TCT.Elem
34 import Hdoc.TCT.Debug
35
36 -- * Type 'Root'
37 -- | A single 'Tree' to gather all the 'Node's
38 -- simplifies greatly the navigation and transformations,
39 -- especially because the later XML or DTC output
40 -- are themselves a single tree-like data structure.
41 --
42 -- Also, having a single 'Tree' is easier to merge
43 -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' ('Just' "li") ""))@),
44 -- and XML coming from the second parsing phase (eg. @'NodePair' ('PairElem' ('Just' "li") [])@).
45 --
46 -- For error reporting, indentation sensitivity and paragraph grouping,
47 -- each 'Node' is annotated with a 'Cell'
48 -- spanning over all its content (sub-'Trees' included).
49 type Root = Tree (Cell Node)
50 type Roots = Trees (Cell Node)
51
52 pattern Tree0 :: a -> Tree a
53 pattern Tree0 a <- Tree a (null -> True)
54 where Tree0 a = Tree a mempty
55
56 -- * Type 'Node'
57 data Node
58 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
59 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
60 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
61 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
62 | NodeLower !ElemName !ElemAttrs -- ^ node, @<name a=b@
63 | NodePara -- ^ node, gather trees by paragraph,
64 -- useful to know when to generate a <para> XML node
65 deriving (Eq,Show)
66 instance Pretty Node
67
68 -- * Type 'Header'
69 data Header
70 = HeaderColon !(Maybe ElemName) !White -- ^ @name: @
71 | HeaderEqual !ElemName !White -- ^ @name=@
72 | HeaderBar !(Maybe ElemName) !White -- ^ @name|@
73 | HeaderGreat !(Maybe ElemName) !White -- ^ @name>@
74 | HeaderBrackets !TL.Text -- ^ @[name]@
75 | HeaderDot !TL.Text -- ^ @1. @
76 | HeaderDash -- ^ @- @
77 | HeaderDashDash -- ^ @-- @
78 | HeaderSection !LevelSection -- ^ @# @
79 | HeaderDotSlash !FilePath -- ^ @./file @
80 deriving (Eq, Ord, Show)
81 instance Pretty Header
82
83 {-
84 -- ** Type 'Name'
85 type Name = TL.Text
86 -}
87
88 -- ** Type 'LevelSection'
89 type LevelSection = Int
90
91 -- * Type 'Pair'
92 data Pair
93 = PairElem !ElemName !ElemAttrs -- ^ @\<name a0=v0 a1=v1>text\</name>@
94 | PairTag Bool -- ^ @\#text\#@ or @~\#text\#@
95 | PairAt Bool -- ^ @\@text\@@ or @~\@text\@@
96 | PairStar -- ^ @*text*@
97 | PairSlash -- ^ @/text/@
98 | PairUnderscore -- ^ @_value_@
99 | PairDash -- ^ @-text-@
100 | PairBackquote -- ^ @`text`@
101 | PairSinglequote -- ^ @'text'@
102 | PairDoublequote -- ^ @"text"@
103 | PairFrenchquote -- ^ @«text»@
104 | PairParen -- ^ @(text)@
105 | PairBrace -- ^ @{text}@
106 | PairBracket -- ^ @[text]@
107 deriving (Eq,Ord,Show)
108 instance Pretty Pair
109
110 -- * Type 'Token'
111 data Token
112 = TokenText !TL.Text
113 | TokenEscape !Char -- ^ @\\char@
114 | TokenLink !Link
115 | TokenAt !Bool !Ref -- ^ @\@foo@ or @~\@foo@
116 | TokenTag !Bool !Ref -- ^ @\#foo@ or @~\#foo@
117 deriving (Eq,Show)
118
119 {-
120 -- * Type 'Filter'
121 data Filter
122 = FilterAnd !Filter !Filter
123 | FilterOr !Filter !Filter
124 | FilterNot !Filter
125 | FilterTag !Ref
126 | FilterAt !Ref
127 deriving (Eq,Ord,Show)
128 -}
129
130 -- ** Type 'Link'
131 type Link = TL.Text
132
133 -- ** Type 'Ref'
134 type Ref = TL.Text
135
136 -- * Type 'Row'
137 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
138 type Row = [Root]
139
140 -- ** Type 'Rows'
141 -- | In reverse order: a list of nodes in scope
142 -- (hence to which the next line can append to).
143 type Rows = [Root]
144
145 -- | Having an initial 'Root' simplifies 'mergeRowIndent':
146 -- one can always put the last 'Root' as a child to a previous one.
147 -- This 'Root' just has to be discarded by 'collapseRows'.
148 initRows :: Rows
149 initRows = [Tree0 $ Sourced (FileRange "" p p :| []) $ NodeHeader HeaderDash]
150 where p = FilePos{filePos_line= -1, filePos_column=0}
151 -- NOTE: such that any following 'Root'
152 -- is 'NodePara' if possible, and always a child.
153
154 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
155 --
156 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
157 -- they MAY span over multilines, and they can be many from a single line.
158 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
159 -- they MUST span only over a single and entire line.
160 --
161 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
162 mergeRow :: Rows -> Row -> Rows
163 mergeRow rows row =
164 debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $
165 mergeRowPrefix 0 rows $ List.reverse row
166
167 -- | Merge by considering matching prefixes.
168 --
169 -- 'HeaderGreat' and 'HeaderBar' work, not on indentation,
170 -- but on their vertical alignment as prefixes.
171 -- Hence, each new 'Row' has those prefixes zipped into a single one
172 -- when they match, are aligned and adjacent.
173 mergeRowPrefix :: ColNum -> Rows -> Row -> Rows
174 mergeRowPrefix col rows row =
175 debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $
176 case (row,rows) of
177 ([], _) -> rows
178 (_, []) -> undefined -- NOTE: cannot happen with initRows
179 ( _new@(Tree (Sourced (FileRange _fn bn _en:|_sn) n) _ns):news
180 , _old@(Tree (Sourced (FileRange _fo _bo eo:|_so) _o) _os):_olds ) ->
181 case collapseRowsWhile isCollapsable rows of
182 [] -> mergeRowIndent rows row
183 head@(unTree -> ch@(Sourced (FileRange _fh bh _eh:|_sh) h)) : olds' ->
184 case (n,h) of
185 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
186 -- then check if there is a matching HeaderGreat,
187 -- if so, discard new and restart with a col advanced to new's beginning
188 (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{})
189 | isAdjacent && isMatching ch -> discard
190 -- NOTE: same for HeaderBar
191 (NodeHeader HeaderBar{}, NodeHeader HeaderBar{})
192 | isAdjacent && isMatching ch -> discard
193 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
194 -- makes it collapse entirely
195 (_, NodeHeader HeaderGreat{})
196 | col < filePos_column bh -> collapse
197 -- NOTE: same for HeaderBar
198 (_, NodeHeader HeaderBar{})
199 | col < filePos_column bh -> collapse
200 _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row
201 where
202 isAdjacent = filePos_line bn - filePos_line eo <= 1
203 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (filePos_column bh) rows news
204 collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row
205 where
206 isMatching (Sourced (FileRange _fh bh _eh:|_sh) h) =
207 filePos_column bn == filePos_column bh &&
208 n == h
209 isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $
210 \_t0@(unTree -> c0@(Sourced (FileRange _f0 b0 _e0:|_s0) _n0))
211 _t1@(unTree -> Sourced (FileRange _f1 b1 e1:|_s1) _n1) ->
212 not (isMatching c0) &&
213 (filePos_line b0 - filePos_line e1 <= 1) && -- adjacent
214 col < filePos_column b1 -- righter than col
215
216 -- | Merge by considering indentation.
217 mergeRowIndent :: Rows -> Row -> Rows
218 mergeRowIndent rows row =
219 debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $
220 case (row,rows) of
221 ([], _) -> rows
222 (_, []) -> undefined -- NOTE: cannot happen with initRows
223 ( new@(Tree (Sourced ssn@(FileRange fn bn en:|sn) n) ns):news
224 ,old@(Tree (Sourced sso@(FileRange fo bo eo:|so) o) os):olds ) ->
225 case debug0 "mergeRowIndent/colNew" (filePos_column bn) `compare`
226 debug0 "mergeRowIndent/colOld" (filePos_column bo) of
227 -- NOTE: new is on the left
228 LT ->
229 case (n,o) of
230 -- NOTE: merge adjacent NodeText
231 -- first
232 -- second
233 (NodeText tn, NodeText to)
234 | TL.null tn || TL.null to
235 , not isVerbatim -> collapse
236 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
237 where
238 t = NodeText <$> Sourced (FileRange fo boNew eo:|so) (indent<>to) <> Sourced ssn tn
239 boNew = bo{filePos_column=filePos_column bn}
240 indent = TL.replicate (int64 $ filePos_column bo - filePos_column bn) " "
241 -- | Whether the horizontal delta is made of spaces
242 isIndented =
243 debug0 "mergeRowIndent/isIndented" $
244 case olds of
245 [] -> True
246 (unTree -> (source -> (fileRange_end -> ep) :| _)) : _ ->
247 case filePos_line ep `compare` filePos_line bo of
248 LT -> True
249 EQ -> filePos_column ep <= filePos_column bn
250 _ -> False
251 _ -> collapse
252 -- NOTE: new is vertically aligned
253 EQ ->
254 case (n,o) of
255 -- NOTE: preserve all NodeText "", but still split into two NodePara
256 (NodeText tn, NodeText to)
257 | TL.null tn || TL.null to
258 , not isVerbatim -> collapse
259 | isAdjacent -> merge $ Tree (NodeText <$> Sourced sso to <> Sourced ssn tn) (os<>ns)
260 -- NOTE: HeaderSection can parent Nodes at the same level
261 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
262 if debug0 "mergeRowIndent/lvlNew" lvlNew
263 > debug0 "mergeRowIndent/lvlOld" lvlOld
264 -- # old
265 -- ## new
266 then concat
267 -- ## old or # old
268 -- # new # new
269 else collapse
270 -- NOTE: old is no HeaderSection, then collapse to any older and loop
271 (NodeHeader HeaderSection{}, _)
272 | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows
273 , (unTree -> (unSourced -> NodeHeader HeaderSection{})) <- sec ->
274 mergeRowIndent rows' row
275 where
276 isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $
277 \_t0@(unTree -> Sourced (FileRange _f0 b0 _e0:|_ss0) n0) _t1 ->
278 case n0 of
279 NodeHeader HeaderSection{} -> False
280 _ -> filePos_column bn == filePos_column b0
281 -- NOTE: in case of alignment, HeaderSection is parent
282 (_, NodeHeader HeaderSection{}) -> concat
283 --
284 _ -> replace
285 -- NOTE: new is on the right
286 GT ->
287 case (n,o) of
288 -- NOTE: keep NodeText "" out of old NodePara
289 (NodeText "", NodePara) -> collapse
290 -- NOTE: merge adjacent NodeText
291 (NodeText tn, NodeText to) ->
292 case isAdjacent of
293 _ | TL.null tn || TL.null to
294 , not isVerbatim -> collapse
295 -- old
296 -- new
297 True -> merge $ Tree (NodeText <$> Sourced sso to <> Sourced ssn tn) (os<>ns)
298 -- old
299 --
300 -- new
301 False -> mergeRowIndent (collapseRoot old olds) (shifted:news)
302 where
303 shifted = Tree (Sourced (FileRange fn bnNew en:|sn) $ NodeText $ indent<>tn) (os<>ns)
304 bnNew = bn{filePos_column=filePos_column bo}
305 indent = TL.replicate (int64 $ filePos_column bn - filePos_column bo) " "
306 --
307 _ -> concat
308 where
309 isAdjacent = filePos_line bn - filePos_line eo <= 1
310 -- | Whether a parent semantic want new to stay a NodeText
311 isVerbatim = any p rows
312 where
313 p (unTree -> (unSourced -> NodeHeader HeaderBar{})) = True
314 p _ = False
315 concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows
316 merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news
317 collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row
318 replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news
319
320 -- | Like 'mergeRowIndent', but without maintaining the appending,
321 -- hence collapsing all the 'Root's of the given 'Rows'.
322 --
323 -- NOTE: 'initRows' MUST have been the first 'Rows'
324 -- before calling 'mergeRowIndent' on it to get the given 'Rows'.
325 collapseRows :: Rows -> Roots
326 collapseRows rows =
327 debug1_ "collapseRows" ("rows",rows) $
328 case collapseRowsWhile (\_new _old -> True) rows of
329 [t] -> subTrees t
330 _ -> undefined
331 -- NOTE: subTrees returns the children of the updated initRows
332
333 -- | Collapse downto any last HeaderSection, returning it and its level.
334 collapseSection :: ColNum -> Rows -> Rows
335 collapseSection col = debug1 "collapseSection" "rows" go
336 where
337 go rows@(new@(unTree -> Sourced (FileRange _fn bn _en:|_sn) n):olds)
338 | col <= filePos_column bn =
339 case n of
340 NodeHeader HeaderSection{} -> rows
341 _ -> collapseSection col $ collapseRoot new $ go olds
342 go _ = mempty
343
344 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
345 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
346 [] -> mempty
347 rows@(new@(Tree (Sourced (FileRange _fn bn _en:|_sn) n) _ns):news) ->
348 case news of
349 [] -> rows
350 old@(Tree (Sourced (FileRange _fo bo eo:|_so) o) _os):olds
351 | not $ test new old -> rows
352 | otherwise ->
353 case debug0 "collapseRowsWhile/colNew" (filePos_column bn) `compare`
354 debug0 "collapseRowsWhile/colOld" (filePos_column bo) of
355 -- NOTE: new is vertically aligned
356 EQ ->
357 case (n,o) of
358 -- NOTE: HeaderSection can parent Nodes at the same level
359 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
360 if debug0 "collapseRowsWhile/lvlNew" lvlNew
361 > debug0 "collapseRowsWhile/lvlOld" lvlOld
362 -- # old
363 -- ## new
364 then collapse
365 -- ## old or # old
366 -- # new # new
367 else
368 debug "collapseRowsWhile/replace" $
369 collapseRowsWhile test $ (new:) $ collapseRoot old olds
370 -- NOTE: old is no HeaderSection, then collapse to any older and loop
371 (NodeHeader HeaderSection{}, _)
372 | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news
373 , (unTree -> (unSourced -> NodeHeader HeaderSection{})) <- sec ->
374 collapseRowsWhile test news'
375 where
376 isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $
377 \_t0@(unTree -> Sourced (FileRange _f0 b0 _e0:|_s0) n0) _t1 ->
378 case n0 of
379 NodeHeader HeaderSection{} -> False
380 _ -> filePos_column bn == filePos_column b0
381 -- NOTE: in case of alignment, HeaderSection is parent
382 (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse
383 -- NOTE: merge within old NodePara.
384 (_, NodePara) | isAdjacent -> collapse
385 --
386 _ -> collapse2
387 -- NOTE: new is either on the left or on the right
388 _ -> collapse
389 where
390 isAdjacent = filePos_line bn - filePos_line eo <= 1
391 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news
392 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds
393
394 -- | Put a 'Root' as a child of the head 'Root'.
395 --
396 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
397 --
398 -- NOTE: any NodeText/NodeText merging must have been done before.
399 collapseRoot :: Root -> Rows -> Rows
400 collapseRoot new@(Tree (Sourced ssn@(FileRange _fn bn en:|_sn) n) _ns) rows =
401 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
402 case rows of
403 [] -> return new
404 old@(Tree (Sourced (FileRange fo bo eo:|so) o) os) : olds ->
405 case (n,o) of
406 -- NOTE: no child into NodeText
407 (_, NodeText{}) -> collapse2
408 -- NOTE: NodeText can begin a NodePara
409 (NodeText tn, _) | not $ TL.null tn ->
410 case o of
411 -- NOTE: no NodePara within those
412 NodeHeader HeaderEqual{} -> collapse
413 NodeHeader HeaderBar{} -> collapse
414 NodeHeader HeaderDashDash{} -> collapse
415 -- NOTE: NodePara within those
416 NodePara | not isAdjacent -> para
417 NodeHeader{} -> para
418 _ -> collapse
419 -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara.
420 -- Note that since a NodePara is never adjacent to another,
421 -- it is not nested within another.
422 (_, NodePara)
423 | isAdjacent ->
424 case n of
425 -- NOTE: no HeaderSection (even adjacent) within a NodePara
426 NodeHeader HeaderSection{} -> collapse2
427 _ -> collapse
428 | otherwise -> collapse2
429 _ -> collapse
430 where
431 isAdjacent = filePos_line bn - filePos_line eo <= 1
432 para = Tree (Sourced ssn NodePara) (return new) : rows
433 collapse = Tree (Sourced (FileRange fo bo en:|so) o) (os |> new) : olds
434 collapse2 = collapseRoot new $ collapseRoot old olds