]> Git — Sourcephile - doclang.git/blob - Language/TCT/Tree.hs
Fix HeaderGreat parsing.
[doclang.git] / Language / TCT / Tree.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Language.TCT.Tree
5 ( module Language.TCT.Tree
6 , Tree(..), Trees
7 ) where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
16 import Data.Int (Int)
17 import Data.Monoid (Monoid(..))
18 import Data.Ord (Ordering(..), Ord(..))
19 import Data.Semigroup (Semigroup(..))
20 import Data.Sequence ((|>), ViewR(..))
21 import Data.TreeSeq.Strict (Tree(..), Trees)
22 import Prelude (undefined, Num(..))
23 import System.FilePath (FilePath)
24 import Text.Show (Show(..))
25 import qualified Data.List as List
26 import qualified Data.Sequence as Seq
27 import qualified Data.Text.Lazy as TL
28
29 import Language.TCT.Utils
30 import Language.TCT.Cell
31 import Language.TCT.Elem
32 import Language.TCT.Debug
33
34 -- * Type 'Root'
35 -- | A single 'Tree' to rule all the 'Node's
36 -- simplifies the navigation and transformations.
37 --
38 -- For error reporting, each 'Node' is annotated with a 'Cell'
39 -- spanning over all its content (sub-'Trees' included).
40 type Root = Tree (Cell Node)
41 type Roots = Trees (Cell Node)
42
43 pattern Tree0 :: a -> Tree a
44 pattern Tree0 a <- Tree a (null -> True)
45 where Tree0 a = Tree a mempty
46
47 -- * Type 'Node'
48 data Node
49 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
50 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
51 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
52 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
53 | NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
54 | NodePara -- ^ node, gather trees by paragraph,
55 -- useful to know when to generate a <para> XML node
56 | NodeGroup -- ^ node, group trees into a single tree,
57 -- useful to return many trees when only one is expected
58 deriving (Eq,Show)
59 instance Pretty Node
60
61 -- * Type 'Header'
62 data Header
63 = HeaderColon !Name !White -- ^ @name: @
64 | HeaderEqual !Name !White -- ^ @name=@
65 | HeaderBar !Name !White -- ^ @name|@
66 | HeaderGreat !Name !White -- ^ @name>@
67 | HeaderBrackets !Name -- ^ @[name]@
68 | HeaderDot !Name -- ^ @1. @
69 | HeaderDash -- ^ @- @
70 | HeaderDashDash -- ^ @-- @
71 | HeaderSection !LevelSection -- ^ @# @
72 | HeaderDotSlash !FilePath -- ^ @./file @
73 deriving (Eq, Ord, Show)
74 instance Pretty Header
75
76 -- ** Type 'Name'
77 type Name = TL.Text
78
79 -- ** Type 'LevelSection'
80 type LevelSection = Int
81
82 -- * Type 'Pair'
83 data Pair
84 = PairElem !ElemName !ElemAttrs -- ^ @<elem n0=v0 n1=v1>value</elem>@
85 | PairHash -- ^ @#value#@
86 | PairStar -- ^ @*value*@
87 | PairSlash -- ^ @/value/@
88 | PairUnderscore -- ^ @_value_@
89 | PairDash -- ^ @-value-@
90 | PairBackquote -- ^ @`value`@
91 | PairSinglequote -- ^ @'value'@
92 | PairDoublequote -- ^ @"value"@
93 | PairFrenchquote -- ^ @«value»@
94 | PairParen -- ^ @(value)@
95 | PairBrace -- ^ @{value}@
96 | PairBracket -- ^ @[value]@
97 deriving (Eq,Ord,Show)
98 instance Pretty Pair
99
100 -- * Type 'Token'
101 data Token
102 = TokenText !TL.Text
103 | TokenEscape !Char
104 | TokenLink !Link
105 | TokenTag !Tag
106 deriving (Eq,Show)
107
108 -- ** Type 'Tag'
109 type Tag = TL.Text
110
111 -- ** Type 'Link'
112 type Link = TL.Text
113
114 -- * Type 'Row'
115 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
116 type Row = [Root]
117
118 -- ** Type 'Rows'
119 -- | In reverse order: a list of nodes in scope
120 -- (hence to which the next line can append to).
121 type Rows = [Root]
122
123 -- | Having an initial 'Root' simplifies 'appendRow':
124 -- one can always put the last 'Root' as a child to a previous one.
125 -- This 'Root' just has to be discarded by 'collapseRows'.
126 initRows :: Rows
127 initRows = [Tree0 (Cell p p NodeGroup)]
128 where p = pos1{pos_line= -1, pos_column=0}
129 -- NOTE: such that any following 'Root'
130 -- is 'NodePara' if possible, and always a child.
131
132 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
133 --
134 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
135 -- they MAY span over multilines, and they can be many from a single line.
136 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
137 -- they MUST span only over a single and entire line.
138 --
139 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
140 mergeRow :: Rows -> Row -> Rows
141 mergeRow rows row =
142 debug2_ "mergeRow" ("news",row) ("olds",rows) $
143 zipRow 0 rows $ List.reverse row
144
145 -- | 'HeaderGreat' and 'HeaderBar' work, not on indentation,
146 -- but on their vertical alignment as prefixes.
147 -- Hence, each new 'Row' has those prefixes zipped into a single one
148 -- when they match and are aligned.
149 zipRow :: ColNum -> Rows -> Row -> Rows
150 zipRow col rows row =
151 debug3_ "zipRow" ("col",col) ("news",row) ("olds",rows) $
152 case (row,rows) of
153 ([], _) -> rows
154 (_, []) -> undefined -- NOTE: cannot happen with initRows
155 ( _new@(Tree (Cell bn _en n) _ns):news
156 , _old@(Tree (Cell _bo eo _o) _os):_olds ) ->
157 case collapseRowsWhile isCollapsable rows of
158 [] -> appendRow rows row
159 head@(unTree -> Cell bh _eh h) : olds' ->
160 case (n,h) of
161 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
162 -- then check if there is a matching HeaderGreat,
163 -- if so, discard new and restart with a col advanced to new's beginning
164 (NodeHeader hn@HeaderGreat{}, NodeHeader hh@HeaderGreat{})
165 | pos_column bn == pos_column bh
166 , isAdjacent
167 , hn == hh -> discard
168 -- NOTE: same for HeaderBar
169 (NodeHeader hn@HeaderBar{}, NodeHeader hh@HeaderBar{})
170 | pos_column bn == pos_column bh
171 , isAdjacent
172 , hn == hh -> discard
173 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
174 -- makes it collapse entirely
175 (_, NodeHeader HeaderGreat{})
176 | col < pos_column bh -> collapse
177 -- NOTE: same for HeaderBar
178 (_, NodeHeader HeaderBar{})
179 | col < pos_column bh -> collapse
180 _ -> debug "zipRow/append" $ appendRow rows row
181 where
182 discard = debug "zipRow/discard" $ zipRow (pos_column bh) rows news
183 collapse = debug "zipRow/collapse" $ zipRow col (collapseRoot head olds') row
184 isAdjacent = pos_line bn - pos_line eo <= 1
185 where
186 isCollapsable = -- debug2 "zipRow/isCollapsable" "new" "old" $
187 \_new@(unTree -> Cell bn _en _n) _old@(unTree -> Cell bo eo _o) ->
188 (pos_line bn - pos_line eo <= 1) && -- adjacent
189 col < pos_column bo -- righter than col
190
191 appendRow :: Rows -> Row -> Rows
192 appendRow rows row =
193 debug2_ "appendRow" ("news",row) ("olds",rows) $
194 case (row,rows) of
195 ([], _) -> rows
196 (_, []) -> undefined -- NOTE: cannot happen with initRows
197 ( new@(Tree (Cell bn en n) ns):news
198 ,old@(Tree (Cell bo eo o) os):olds ) ->
199 case debug0 "appendRow/colNew" (pos_column bn) `compare`
200 debug0 "appendRow/colOld" (pos_column bo) of
201 -- NOTE: new is on the left
202 LT ->
203 case (n,o) of
204 -- NOTE: merge adjacent NodeText
205 -- first
206 -- second
207 (NodeText tn, NodeText to)
208 | TL.null tn || TL.null to
209 , not isVerbatim -> collapse
210 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
211 where
212 t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn
213 boNew = bo{pos_column=pos_column bn}
214 indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
215 -- | Whether the horizontal delta is made of spaces
216 isIndented =
217 debug0 "appendRow/isIndented" $
218 case olds of
219 [] -> True
220 (unTree -> cell_end -> ep) : _ ->
221 case pos_line ep `compare` pos_line bo of
222 LT -> True
223 EQ -> pos_column ep <= pos_column bn
224 _ -> False
225 _ -> collapse
226 -- NOTE: new is vertically aligned
227 EQ ->
228 case (n,o) of
229 -- NOTE: preserve all NodeText "", but still split into two NodePara
230 (NodeText tn, NodeText to)
231 | TL.null tn || TL.null to
232 , not isVerbatim -> collapse
233 | isAdjacent -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
234 -- NOTE: HeaderSection can parent Nodes at the same level
235 (NodeHeader (HeaderSection lvlNew), _)
236 | rows'@(old':olds') <- collapseRowsWhile isCollapsable rows
237 , (unTree -> unCell -> NodeHeader (HeaderSection lvlOld)) <- old' ->
238 if debug0 "appendRow/lvlNew" lvlNew
239 > debug0 "appendRow/lvlOld" lvlOld
240 then -- # old'
241 -- ## new
242 {-concat using old'-} List.reverse row <> rows'
243 else -- ## old' or # old'
244 -- # new # new
245 {-collapse using old'-} appendRow (collapseRoot old' olds') row
246 where
247 isCollapsable = -- debug2 "appendRow/isCollapsable" "new" "old" $
248 \_new _old@(unTree -> Cell bt _et t) ->
249 case t of
250 NodeHeader HeaderSection{} -> False
251 _ -> pos_column bt == pos_column bn
252 -- NOTE: in case of alignment, HeaderSection is parent
253 (_, NodeHeader HeaderSection{}) -> concat
254 --
255 _ -> replace
256 -- NOTE: new is on the right
257 GT ->
258 case (n,o) of
259 -- NOTE: only same line Root can be pushed on HeaderBar
260 -- DELME: (_, NodeHeader HeaderBar{}) | pos_column bn /= pos_column eo -> collapse
261 -- NOTE: keep NodeText "" out of old NodePara
262 (NodeText "", NodePara) -> collapse
263 -- NOTE: merge adjacent NodeText
264 (NodeText tn, NodeText to) ->
265 case isAdjacent of
266 _ | TL.null tn || TL.null to
267 , not isVerbatim -> collapse
268 -- old
269 -- new
270 True -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
271 -- old
272 --
273 -- new
274 False -> appendRow (collapseRoot old olds) (shifted:news)
275 where
276 shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns)
277 bnNew = bn{pos_column=pos_column bo}
278 indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
279 --
280 _ -> concat
281 where
282 isAdjacent = pos_line bn - pos_line eo <= 1
283 -- | Whether a parent semantic want new to stay a NodeText
284 isVerbatim = any p rows
285 where
286 p (unTree -> unCell -> NodeHeader HeaderBar{}) = True
287 p _ = False
288 concat = debug "appendRow/concat" $ List.reverse row <> rows
289 merge m = debug "appendRow/merge" $ appendRow (m : olds) news
290 collapse = debug "appendRow/collapse" $ appendRow (collapseRoot old olds) row
291 replace = debug "appendRow/replace" $ appendRow (new : collapseRoot old olds) news
292
293 -- | Like 'appendRow', but without maintaining the appending,
294 -- hence collapsing all the 'Root's of the given 'Rows'.
295 --
296 -- NOTE: 'initRows' MUST have been the first 'Rows'
297 -- before calling 'appendRow' on it to get the given 'Rows'.
298 collapseRows :: Rows -> Roots
299 collapseRows rows =
300 case collapseRowsWhile (\_new _old -> True) rows of
301 [t] -> subTrees t
302 _ -> undefined
303 -- NOTE: subTrees returns the children of the updated initRows
304
305 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
306 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
307 [] -> mempty
308 rows@(new@(Tree (Cell bn _en n) _ns):olds) ->
309 case olds of
310 [] -> rows
311 --
312 old@(Tree (Cell bo eo o) _os):oldss
313 | not $ test new old -> rows
314 | otherwise ->
315 case debug0 "colNew" (pos_column bn) `compare`
316 debug0 "colOld" (pos_column bo) of
317 -- NOTE: new is vertically aligned
318 EQ ->
319 case (n,o) of
320 -- NOTE: HeaderSection can parent Nodes at the same level
321 (NodeHeader (HeaderSection lvlNew), _)
322 | old':olds' <- collapseRowsWhile isCollapsable olds
323 , (unTree -> unCell -> NodeHeader (HeaderSection lvlOld)) <- old' ->
324 if debug0 "collapseRowsWhile/lvlNew" lvlNew
325 > debug0 "collapseRowsWhile/lvlOld" lvlOld
326 then -- # old'
327 -- ## new
328 collapseRowsWhile test $ collapseRoot new $ old':olds'
329 else -- ## old' or # old'
330 -- # new # new
331 collapseRowsWhile test $ new:collapseRoot old' olds'
332 where
333 isCollapsable =
334 \_new _old@(unTree -> Cell bt _et t) ->
335 case t of
336 NodeHeader HeaderSection{} -> False
337 _ -> pos_column bt == pos_column bn
338 -- NOTE: in case of alignment, HeaderSection is parent
339 (_, NodeHeader HeaderSection{}) -> collapse
340 -- NOTE: merge within old NodePara.
341 (_, NodePara{}) | isAdjacent -> collapse
342 --
343 _ -> collapse2
344 -- NOTE: new is either on the left or on the right
345 _ -> collapse
346 where
347 isAdjacent = pos_line bn - pos_line eo <= 1
348 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new olds
349 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old oldss
350
351 -- | Put a 'Root' as a child of the head 'Root'.
352 --
353 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
354 -- NOTE: any NodeText/NodeText merging must have been done before.
355 collapseRoot :: Root -> Rows -> Rows
356 collapseRoot new@(Tree (Cell bn en n) _ns) rows =
357 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
358 case rows of
359 [] -> return new
360 old@(Tree (Cell bo eo o) os) : olds ->
361 case (n,o) of
362 -- NOTE: never put a NodePara directly within another
363 (NodePara, NodePara) -> collapse2
364 -- NOTE: never put a collapse to NodeText, except some NodeHeader to preserve them
365 (_, NodeText{}) -> collapse2
366 -- NOTE: NodeText can begin a NodePara
367 (NodeText tn, _) | not $ TL.null tn ->
368 case o of
369 -- NOTE: no NodePara within those
370 NodeHeader HeaderEqual{} -> collapse
371 NodeHeader HeaderBar{} -> collapse
372 NodeHeader HeaderDashDash{} -> collapse
373 -- NOTE: NodePara within those
374 NodePara | not isAdjacent -> para
375 NodeHeader{} -> para
376 NodeGroup -> para
377 _ -> collapse
378 _ -> collapse
379 where
380 isAdjacent = pos_line bn - pos_line eo <= 1
381 para = Tree (Cell bn en NodePara) (return new) : rows
382 collapse = Tree (Cell bo en o) (os |> new) : olds
383 collapse2 = collapseRoot new $ collapseRoot old olds
384
385 -- | Return a 'Tree' from a 'Cell' node and 'subTrees',
386 -- while adjusting the node's 'cell_end'
387 -- with the last 'Tree' of the 'subTrees'.
388 tree :: Cell a -> Trees (Cell a) -> Tree (Cell a)
389 tree (Cell bp ep a) ts = Tree (Cell bp ep' a) ts
390 where
391 ep' = case Seq.viewr ts of
392 EmptyR -> ep
393 _ :> (unTree -> cell_end -> p) -> p