1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
4 module Language.TCT.Tree
5 ( module Language.TCT.Tree
9 import Control.Monad (Monad(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..))
14 import Data.Function (($))
15 import Data.Functor ((<$>))
17 import Data.Maybe (Maybe(..))
18 import Data.Monoid (Monoid(..))
19 import Data.Ord (Ordering(..), Ord(..))
20 import Data.Semigroup (Semigroup(..))
21 import Data.Sequence ((|>), ViewR(..))
22 import Data.TreeSeq.Strict (Tree(..), Trees)
23 import Prelude (undefined, Num(..))
24 import System.FilePath (FilePath)
25 import Text.Show (Show(..))
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text.Lazy as TL
30 import Language.TCT.Utils
31 import Language.TCT.Cell
32 import Language.TCT.Elem
33 import Language.TCT.Debug
36 -- | A single 'Tree' to rule all the 'Node's
37 -- simplifies the navigation and transformations.
39 -- For error reporting, each 'Node' is annotated with a 'Cell'
40 -- spanning over all its content (sub-'Trees' included).
41 type Root = Tree (Cell Node)
42 type Roots = Trees (Cell Node)
44 pattern Tree0 :: a -> Tree a
45 pattern Tree0 a <- Tree a (null -> True)
46 where Tree0 a = Tree a mempty
50 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
51 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
52 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
53 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
54 | NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
55 | NodePara -- ^ node, gather trees by paragraph,
56 -- useful to know when to generate a <para> XML node
57 | NodeGroup -- ^ node, group trees into a single tree,
58 -- useful to return many trees when only one is expected
64 = HeaderColon !Name !White -- ^ @name: @
65 | HeaderEqual !Name !White -- ^ @name=@
66 | HeaderBar !Name !White -- ^ @name|@
67 | HeaderGreat !Name !White -- ^ @name>@
68 | HeaderBrackets !Name -- ^ @[name]@
69 | HeaderDot !Name -- ^ @1. @
70 | HeaderDash -- ^ @- @
71 | HeaderDashDash -- ^ @-- @
72 | HeaderSection !LevelSection -- ^ @# @
73 | HeaderDotSlash !FilePath -- ^ @./file @
74 deriving (Eq, Ord, Show)
75 instance Pretty Header
80 -- ** Type 'LevelSection'
81 type LevelSection = Int
85 = PairElem !ElemName !ElemAttrs -- ^ @<elem n0=v0 n1=v1>value</elem>@
86 | PairHash -- ^ @#value#@
87 | PairStar -- ^ @*value*@
88 | PairSlash -- ^ @/value/@
89 | PairUnderscore -- ^ @_value_@
90 | PairDash -- ^ @-value-@
91 | PairBackquote -- ^ @`value`@
92 | PairSinglequote -- ^ @'value'@
93 | PairDoublequote -- ^ @"value"@
94 | PairFrenchquote -- ^ @«value»@
95 | PairParen -- ^ @(value)@
96 | PairBrace -- ^ @{value}@
97 | PairBracket -- ^ @[value]@
98 deriving (Eq,Ord,Show)
116 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
120 -- | In reverse order: a list of nodes in scope
121 -- (hence to which the next line can append to).
124 -- | Having an initial 'Root' simplifies 'appendRow':
125 -- one can always put the last 'Root' as a child to a previous one.
126 -- This 'Root' just has to be discarded by 'collapseRows'.
128 initRows = [Tree0 (Cell p p NodeGroup)]
129 where p = pos1{pos_line= -1, pos_column=0}
130 -- NOTE: such that any following 'Root'
131 -- is 'NodePara' if possible, and always a child.
133 -- | @appendRow rows row@ appends @row@ to @rows@.
135 -- [@rows@] parent 'Rows', from closest to farthest (non-strictly descending)
136 -- [@row@] next 'Row', from leftest column to rightest (non-stricly ascending)
137 appendRow :: Rows -> Row -> Rows
139 debug2_ "appendRow" ("news",row) ("olds",rows) $
141 (_, []) -> undefined -- NOTE: cannot happen with initRows
143 (new@(Tree (Cell bn en n) ns):news, old@(Tree (Cell bo eo o) os):olds) ->
144 case debug0 "appendRow/colNew" (pos_column bn) `compare`
145 debug0 "appendRow/colOld" (pos_column bo) of
146 -- NOTE: new is vertically lower
149 -- NOTE: merge adjacent NodeText
152 (NodeText tn, NodeText to)
153 | TL.null tn || TL.null to -> child
154 | not isNewPara && isIndented -> merge $ Tree t (os<>ns)
156 t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn
157 boNew = bo{pos_column=pos_column bn}
158 indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
159 -- | Whether the horizontal diff is made of spaces
161 debug0 "appendRow/isIndented" $
164 (unTree -> cell_end -> ep) : _ ->
165 case pos_line ep `compare` pos_line bo of
167 EQ -> pos_column ep <= pos_column bn
170 -- NOTE: new is vertically aligned
173 -- NOTE: preserve all NodeText "", but still split into two NodePara
174 (NodeText tn, NodeText to)
175 | TL.null tn || TL.null to -> child
176 | not isNewPara -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
177 -- NOTE: HeaderSection can parent Nodes at the same level
178 (NodeHeader (HeaderSection lvlNew), _)
179 | Just (lvlOld, rows'@(old':olds')) <- collapseSection (pos_column bn) rows ->
180 if debug0 "appendRow/lvlNew" lvlNew
181 > debug0 "appendRow/lvlOld" lvlOld
184 {-concat-} List.reverse row <> rows'
185 else -- ## old or # old
187 {-child old'-} appendRow (appendChild old' olds') row
188 -- NOTE: concat everything else following a HeaderSection.
189 (_, NodeHeader HeaderSection{}) -> concat
191 (NodeHeader ho@HeaderGreat{}, NodeHeader hn) | ho == hn ->
192 debug "appendRow/HeaderGreat" $ appendRow rows news
196 -- NOTE: new is vertically greater
199 -- NOTE: keep NodeText "" out of old NodePara
200 (NodeText "", NodePara) -> child
201 -- NOTE: merge adjacent NodeText
202 (NodeText tn, NodeText to) ->
204 _ | TL.null tn || TL.null to -> child
208 True -> appendRow (appendChild old olds) (shifted:news)
210 shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns)
211 bnNew = bn{pos_column=pos_column bo}
212 indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
215 False -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
219 isNewPara = pos_line bn - pos_line eo > 1
220 concat = debug "appendRow/concat" $ List.reverse row <> rows
221 merge m = debug "appendRow/merge" $ appendRow (m : olds) news
222 child = debug "appendRow/child" $ appendRow (appendChild old olds) row
223 replace = debug "appendRow/replace" $ appendRow (new : appendChild old olds) news
225 -- | Collapse downto any last HeaderSection, returning it and its level.
226 collapseSection :: ColNum -> Rows -> Maybe (LevelSection,Rows)
227 collapseSection col = debug1 "collapseSection" "rows" go
229 go rows@(new@(unTree -> Cell bn _en n):olds)
230 | col == pos_column bn =
232 NodeHeader (HeaderSection lvl) -> return (lvl, rows)
233 _ -> (appendChild new <$>) <$> go olds
236 -- | Like 'appendRow', but without maintaining the appending,
237 -- hence collapsing all the 'Root's of the given 'Rows'.
239 -- NOTE: 'initRows' MUST have been the first 'Rows'
240 -- before calling 'appendRow' on it to get the given 'Rows'.
241 collapseRows :: Rows -> Roots
242 collapseRows = debug1 "collapseRows" "rows" $ \case
244 new@(Tree (Cell bn _en n) _ns):olds ->
247 old@(Tree (Cell bo eo o) _os):oldss ->
248 case debug0 "colNew" (pos_column bn) `compare`
249 debug0 "colOld" (pos_column bo) of
250 -- NOTE: new is vertically aligned
253 (NodeHeader (HeaderSection lvlNew), _)
254 | Just (lvlOld, old':olds') <- collapseSection (pos_column bn) olds ->
255 if debug0 "collapseRows/lvlNew" lvlNew
256 > debug0 "collapseRows/lvlOld" lvlOld
259 {-child new-} collapseRows $ appendChild new $ old':olds'
260 else -- ## old or # old
262 {-child old'-} collapseRows $ new:appendChild old' olds'
263 -- NOTE: in case of alignment, HeaderSection is parent.
264 (_, NodeHeader HeaderSection{}) -> child
265 -- NOTE: merge within old NodePara.
266 (_, NodePara{}) | not isNewPara -> child
269 -- NOTE: new is either vertically lower or greater
272 isNewPara = pos_line bn - pos_line eo > 1
273 child, child2 :: Roots
274 child = debug "collapseRows/child" $ collapseRows $ appendChild new olds
275 child2 = debug "collapseRows/child2" $ collapseRows $ appendChild new $ appendChild old oldss
277 -- | Put a 'Root' as a child of the head 'Root'.
279 -- NOTE: 'appendChild' is where 'NodePara' may be introduced.
280 -- NOTE: any NodeText/NodeText merging must have been done before.
281 appendChild :: Root -> Rows -> Rows
282 appendChild new@(Tree (Cell bn en n) _ns) rows =
283 debug2_ "appendChild" ("new",Seq.singleton new) ("rows",rows) $
286 old@(Tree (Cell bo eo o) os) : olds ->
288 -- NOTE: never put a NodePara directly within another
289 (NodePara, NodePara) -> child2
290 -- NOTE: never put a child to NodeText
291 (_, NodeText{}) -> child2
292 -- NOTE: NodeText can begin a NodePara
293 (NodeText tn, _) | not $ TL.null tn ->
295 -- NOTE: no NodePara within those
296 NodeHeader HeaderEqual{} -> child
297 NodeHeader HeaderBar{} -> child
298 NodeHeader HeaderDashDash{} -> child
299 -- NOTE: NodePara within those
300 NodePara | isNewPara -> para
306 isNewPara = pos_line bn - pos_line eo > 1
307 child = Tree (Cell bo en o) (os |> new) : olds
308 child2 = appendChild new $ appendChild old olds
309 para = Tree (Cell bn en NodePara) (return new) : rows
311 -- | Return a 'Tree' from a 'Cell' node and 'subTrees',
312 -- while adjusting the node's 'cell_end'
313 -- with the last 'Tree' of the 'subTrees'.
314 tree :: Cell a -> Trees (Cell a) -> Tree (Cell a)
315 tree (Cell bp ep a) ts = Tree (Cell bp ep' a) ts
317 ep' = case Seq.viewr ts of
319 _ :> (unTree -> cell_end -> p) -> p