]> Git — Sourcephile - doclang.git/blob - Language/TCT/Tree.hs
Fix NodePara parsing.
[doclang.git] / Language / TCT / Tree.hs
1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Language.TCT.Tree
5 ( module Language.TCT.Tree
6 , Tree(..), Trees
7 ) where
8
9 import Control.Monad (Monad(..))
10 import Data.Bool
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($))
15 import Data.Functor ((<$>))
16 import Data.Int (Int)
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ordering(..), Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence ((|>), ViewR(..))
22 import Data.TreeSeq.Strict (Tree(..), Trees)
23 import Prelude (undefined, Num(..))
24 import System.FilePath (FilePath)
25 import Text.Show (Show(..))
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text.Lazy as TL
29
30 import Language.TCT.Utils
31 import Language.TCT.Cell
32 import Language.TCT.Elem
33 import Language.TCT.Debug
34
35 -- * Type 'Root'
36 -- | A single 'Tree' to rule all the 'Node's
37 -- simplifies the navigation and transformations.
38 --
39 -- For error reporting, each 'Node' is annotated with a 'Cell'
40 -- spanning over all its content (sub-'Trees' included).
41 type Root = Tree (Cell Node)
42 type Roots = Trees (Cell Node)
43
44 pattern Tree0 :: a -> Tree a
45 pattern Tree0 a <- Tree a (null -> True)
46 where Tree0 a = Tree a mempty
47
48 -- * Type 'Node'
49 data Node
50 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
51 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
52 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
53 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
54 | NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
55 | NodePara -- ^ node, gather trees by paragraph,
56 -- useful to know when to generate a <para> XML node
57 | NodeGroup -- ^ node, group trees into a single tree,
58 -- useful to return many trees when only one is expected
59 deriving (Eq,Show)
60 instance Pretty Node
61
62 -- * Type 'Header'
63 data Header
64 = HeaderColon !Name !White -- ^ @name: @
65 | HeaderEqual !Name !White -- ^ @name=@
66 | HeaderBar !Name !White -- ^ @name|@
67 | HeaderGreat !Name !White -- ^ @name>@
68 | HeaderBrackets !Name -- ^ @[name]@
69 | HeaderDot !Name -- ^ @1. @
70 | HeaderDash -- ^ @- @
71 | HeaderDashDash -- ^ @-- @
72 | HeaderSection !LevelSection -- ^ @# @
73 | HeaderDotSlash !FilePath -- ^ @./file @
74 deriving (Eq, Ord, Show)
75 instance Pretty Header
76
77 -- ** Type 'Name'
78 type Name = TL.Text
79
80 -- ** Type 'LevelSection'
81 type LevelSection = Int
82
83 -- * Type 'Pair'
84 data Pair
85 = PairElem !ElemName !ElemAttrs -- ^ @<elem n0=v0 n1=v1>value</elem>@
86 | PairHash -- ^ @#value#@
87 | PairStar -- ^ @*value*@
88 | PairSlash -- ^ @/value/@
89 | PairUnderscore -- ^ @_value_@
90 | PairDash -- ^ @-value-@
91 | PairBackquote -- ^ @`value`@
92 | PairSinglequote -- ^ @'value'@
93 | PairDoublequote -- ^ @"value"@
94 | PairFrenchquote -- ^ @«value»@
95 | PairParen -- ^ @(value)@
96 | PairBrace -- ^ @{value}@
97 | PairBracket -- ^ @[value]@
98 deriving (Eq,Ord,Show)
99 instance Pretty Pair
100
101 -- * Type 'Token'
102 data Token
103 = TokenText !TL.Text
104 | TokenEscape !Char
105 | TokenLink !Link
106 | TokenTag !Tag
107 deriving (Eq,Show)
108
109 -- ** Type 'Tag'
110 type Tag = TL.Text
111
112 -- ** Type 'Link'
113 type Link = TL.Text
114
115 -- * Type 'Row'
116 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
117 type Row = [Root]
118
119 -- ** Type 'Rows'
120 -- | In reverse order: a list of nodes in scope
121 -- (hence to which the next line can append to).
122 type Rows = [Root]
123
124 -- | Having an initial 'Root' simplifies 'appendRow':
125 -- one can always put the last 'Root' as a child to a previous one.
126 -- This 'Root' just has to be discarded by 'collapseRows'.
127 initRows :: Rows
128 initRows = [Tree0 (Cell p p NodeGroup)]
129 where p = pos1{pos_line= -1, pos_column=0}
130 -- NOTE: such that any following 'Root'
131 -- is 'NodePara' if possible, and always a child.
132
133 -- | @appendRow rows row@ appends @row@ to @rows@.
134 --
135 -- [@rows@] parent 'Rows', from closest to farthest (non-strictly descending)
136 -- [@row@] next 'Row', from leftest column to rightest (non-stricly ascending)
137 appendRow :: Rows -> Row -> Rows
138 appendRow rows row =
139 debug2_ "appendRow" ("news",row) ("olds",rows) $
140 case (row,rows) of
141 (_, []) -> undefined -- NOTE: cannot happen with initRows
142 ([], _) -> rows
143 (new@(Tree (Cell bn en n) ns):news, old@(Tree (Cell bo eo o) os):olds) ->
144 case debug0 "appendRow/colNew" (pos_column bn) `compare`
145 debug0 "appendRow/colOld" (pos_column bo) of
146 -- NOTE: new is vertically lower
147 LT ->
148 case (n,o) of
149 -- NOTE: merge adjacent NodeText
150 -- first
151 -- second
152 (NodeText tn, NodeText to)
153 | TL.null tn || TL.null to -> child
154 | not isNewPara && isIndented -> merge $ Tree t (os<>ns)
155 where
156 t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn
157 boNew = bo{pos_column=pos_column bn}
158 indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
159 -- | Whether the horizontal diff is made of spaces
160 isIndented =
161 debug0 "appendRow/isIndented" $
162 case olds of
163 [] -> True
164 (unTree -> cell_end -> ep) : _ ->
165 case pos_line ep `compare` pos_line bo of
166 LT -> True
167 EQ -> pos_column ep <= pos_column bn
168 _ -> False
169 _ -> child
170 -- NOTE: new is vertically aligned
171 EQ ->
172 case (n,o) of
173 -- NOTE: preserve all NodeText "", but still split into two NodePara
174 (NodeText tn, NodeText to)
175 | TL.null tn || TL.null to -> child
176 | not isNewPara -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
177 -- NOTE: HeaderSection can parent Nodes at the same level
178 (NodeHeader (HeaderSection lvlNew), _)
179 | Just (lvlOld, rows'@(old':olds')) <- collapseSection (pos_column bn) rows ->
180 if debug0 "appendRow/lvlNew" lvlNew
181 > debug0 "appendRow/lvlOld" lvlOld
182 then -- # old
183 -- ## new
184 {-concat-} List.reverse row <> rows'
185 else -- ## old or # old
186 -- # new # new
187 {-child old'-} appendRow (appendChild old' olds') row
188 -- NOTE: concat everything else following a HeaderSection.
189 (_, NodeHeader HeaderSection{}) -> concat
190 {-
191 (NodeHeader ho@HeaderGreat{}, NodeHeader hn) | ho == hn ->
192 debug "appendRow/HeaderGreat" $ appendRow rows news
193 -}
194 --
195 _ -> replace
196 -- NOTE: new is vertically greater
197 GT ->
198 case (n,o) of
199 -- NOTE: keep NodeText "" out of old NodePara
200 (NodeText "", NodePara) -> child
201 -- NOTE: merge adjacent NodeText
202 (NodeText tn, NodeText to) ->
203 case isNewPara of
204 _ | TL.null tn || TL.null to -> child
205 -- old
206 --
207 -- new
208 True -> appendRow (appendChild old olds) (shifted:news)
209 where
210 shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns)
211 bnNew = bn{pos_column=pos_column bo}
212 indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
213 -- old
214 -- new
215 False -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
216 --
217 _ -> concat
218 where
219 isNewPara = pos_line bn - pos_line eo > 1
220 concat = debug "appendRow/concat" $ List.reverse row <> rows
221 merge m = debug "appendRow/merge" $ appendRow (m : olds) news
222 child = debug "appendRow/child" $ appendRow (appendChild old olds) row
223 replace = debug "appendRow/replace" $ appendRow (new : appendChild old olds) news
224
225 -- | Collapse downto any last HeaderSection, returning it and its level.
226 collapseSection :: ColNum -> Rows -> Maybe (LevelSection,Rows)
227 collapseSection col = debug1 "collapseSection" "rows" go
228 where
229 go rows@(new@(unTree -> Cell bn _en n):olds)
230 | col == pos_column bn =
231 case n of
232 NodeHeader (HeaderSection lvl) -> return (lvl, rows)
233 _ -> (appendChild new <$>) <$> go olds
234 go _ = Nothing
235
236 -- | Like 'appendRow', but without maintaining the appending,
237 -- hence collapsing all the 'Root's of the given 'Rows'.
238 --
239 -- NOTE: 'initRows' MUST have been the first 'Rows'
240 -- before calling 'appendRow' on it to get the given 'Rows'.
241 collapseRows :: Rows -> Roots
242 collapseRows = debug1 "collapseRows" "rows" $ \case
243 [] -> mempty
244 new@(Tree (Cell bn _en n) _ns):olds ->
245 case olds of
246 [] -> subTrees new
247 old@(Tree (Cell bo eo o) _os):oldss ->
248 case debug0 "colNew" (pos_column bn) `compare`
249 debug0 "colOld" (pos_column bo) of
250 -- NOTE: new is vertically aligned
251 EQ ->
252 case (n,o) of
253 (NodeHeader (HeaderSection lvlNew), _)
254 | Just (lvlOld, old':olds') <- collapseSection (pos_column bn) olds ->
255 if debug0 "collapseRows/lvlNew" lvlNew
256 > debug0 "collapseRows/lvlOld" lvlOld
257 then -- # old
258 -- ## new
259 {-child new-} collapseRows $ appendChild new $ old':olds'
260 else -- ## old or # old
261 -- # new # new
262 {-child old'-} collapseRows $ new:appendChild old' olds'
263 -- NOTE: in case of alignment, HeaderSection is parent.
264 (_, NodeHeader HeaderSection{}) -> child
265 -- NOTE: merge within old NodePara.
266 (_, NodePara{}) | not isNewPara -> child
267 --
268 _ -> child2
269 -- NOTE: new is either vertically lower or greater
270 _ -> child
271 where
272 isNewPara = pos_line bn - pos_line eo > 1
273 child, child2 :: Roots
274 child = debug "collapseRows/child" $ collapseRows $ appendChild new olds
275 child2 = debug "collapseRows/child2" $ collapseRows $ appendChild new $ appendChild old oldss
276
277 -- | Put a 'Root' as a child of the head 'Root'.
278 --
279 -- NOTE: 'appendChild' is where 'NodePara' may be introduced.
280 -- NOTE: any NodeText/NodeText merging must have been done before.
281 appendChild :: Root -> Rows -> Rows
282 appendChild new@(Tree (Cell bn en n) _ns) rows =
283 debug2_ "appendChild" ("new",Seq.singleton new) ("rows",rows) $
284 case rows of
285 [] -> return new
286 old@(Tree (Cell bo eo o) os) : olds ->
287 case (n,o) of
288 -- NOTE: never put a NodePara directly within another
289 (NodePara, NodePara) -> child2
290 -- NOTE: never put a child to NodeText
291 (_, NodeText{}) -> child2
292 -- NOTE: NodeText can begin a NodePara
293 (NodeText tn, _) | not $ TL.null tn ->
294 case o of
295 -- NOTE: no NodePara within those
296 NodeHeader HeaderEqual{} -> child
297 NodeHeader HeaderBar{} -> child
298 NodeHeader HeaderDashDash{} -> child
299 -- NOTE: NodePara within those
300 NodePara | isNewPara -> para
301 NodeHeader{} -> para
302 NodeGroup -> para
303 _ -> child
304 _ -> child
305 where
306 isNewPara = pos_line bn - pos_line eo > 1
307 child = Tree (Cell bo en o) (os |> new) : olds
308 child2 = appendChild new $ appendChild old olds
309 para = Tree (Cell bn en NodePara) (return new) : rows
310
311 -- | Return a 'Tree' from a 'Cell' node and 'subTrees',
312 -- while adjusting the node's 'cell_end'
313 -- with the last 'Tree' of the 'subTrees'.
314 tree :: Cell a -> Trees (Cell a) -> Tree (Cell a)
315 tree (Cell bp ep a) ts = Tree (Cell bp ep' a) ts
316 where
317 ep' = case Seq.viewr ts of
318 EmptyR -> ep
319 _ :> (unTree -> cell_end -> p) -> p