1 {-# LANGUAGE OverloadedStrings #-}
2 {-# LANGUAGE PatternSynonyms #-}
3 {-# LANGUAGE ViewPatterns #-}
9 import Control.Monad (Monad(..))
11 import Data.Char (Char)
12 import Data.Eq (Eq(..))
13 import Data.Foldable (Foldable(..), any)
14 import Data.Function (($))
15 import Data.Functor ((<$>))
17 import Data.List.NonEmpty (NonEmpty(..))
18 import Data.Maybe (Maybe(..))
19 import Data.Monoid (Monoid(..))
20 import Data.Ord (Ordering(..), Ord(..))
21 import Data.Semigroup (Semigroup(..))
22 import Data.Sequence ((|>))
23 import Data.TreeSeq.Strict (Tree(..), Trees)
24 import Prelude (undefined, Num(..))
25 import System.FilePath (FilePath)
26 import Text.Show (Show(..))
27 import qualified Data.List as List
28 import qualified Data.Sequence as Seq
29 import qualified Data.Text.Lazy as TL
37 -- | A single 'Tree' to gather all the 'Node's
38 -- simplifies greatly the navigation and transformations,
39 -- especially because the later XML or DTC output
40 -- are themselves a single tree-like data structure.
42 -- Also, having a single 'Tree' is easier to merge
43 -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' ('Just' "li") ""))@),
44 -- and XML coming from the second parsing phase (eg. @'NodePair' ('PairElem' ('Just' "li") [])@).
46 -- For error reporting, indentation sensitivity and paragraph grouping,
47 -- each 'Node' is annotated with a 'Cell'
48 -- spanning over all its content (sub-'Trees' included).
49 type Root = Tree (Cell Node)
50 type Roots = Trees (Cell Node)
52 pattern Tree0 :: a -> Tree a
53 pattern Tree0 a <- Tree a (null -> True)
54 where Tree0 a = Tree a mempty
58 = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive)
59 | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive)
60 | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's)
61 | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's)
62 | NodeLower !ElemName !ElemAttrs -- ^ node, @<name a=b@
63 | NodePara -- ^ node, gather trees by paragraph,
64 -- useful to know when to generate a <para> XML node
70 = HeaderColon !(Maybe ElemName) !White -- ^ @name: @
71 | HeaderEqual !ElemName !White -- ^ @name=@
72 | HeaderBar !(Maybe ElemName) !White -- ^ @name|@
73 | HeaderGreat !(Maybe ElemName) !White -- ^ @name>@
74 | HeaderBrackets !TL.Text -- ^ @[name]@
75 | HeaderDot !TL.Text -- ^ @1. @
76 | HeaderDash -- ^ @- @
77 | HeaderDashDash -- ^ @-- @
78 | HeaderSection !LevelSection -- ^ @# @
79 | HeaderDotSlash !FilePath -- ^ @./file @
80 deriving (Eq, Ord, Show)
81 instance Pretty Header
88 -- ** Type 'LevelSection'
89 type LevelSection = Int
93 = PairElem !ElemName !ElemAttrs -- ^ @\<name a0=v0 a1=v1>text\</name>@
94 | PairTag Bool -- ^ @\#text\#@ or @~\#text\#@
95 | PairAt Bool -- ^ @\@text\@@ or @~\@text\@@
96 | PairStar -- ^ @*text*@
97 | PairSlash -- ^ @/text/@
98 | PairUnderscore -- ^ @_value_@
99 | PairDash -- ^ @-text-@
100 | PairBackquote -- ^ @`text`@
101 | PairSinglequote -- ^ @'text'@
102 | PairDoublequote -- ^ @"text"@
103 | PairFrenchquote -- ^ @«text»@
104 | PairParen -- ^ @(text)@
105 | PairBrace -- ^ @{text}@
106 | PairBracket -- ^ @[text]@
107 deriving (Eq,Ord,Show)
113 | TokenEscape !Char -- ^ @\\char@
115 | TokenAt !Bool !Ref -- ^ @\@foo@ or @~\@foo@
116 | TokenTag !Bool !Ref -- ^ @\#foo@ or @~\#foo@
122 = FilterAnd !Filter !Filter
123 | FilterOr !Filter !Filter
127 deriving (Eq,Ord,Show)
137 -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line.
141 -- | In reverse order: a list of nodes in scope
142 -- (hence to which the next line can append to).
145 -- | Having an initial 'Root' simplifies 'mergeRowIndent':
146 -- one can always put the last 'Root' as a child to a previous one.
147 -- This 'Root' just has to be discarded by 'collapseRows'.
149 initRows = [Tree0 $ Sourced (FileRange "" p p :| []) $ NodeHeader HeaderDash]
150 where p = FilePos{filePos_line= -1, filePos_column=0}
151 -- NOTE: such that any following 'Root'
152 -- is 'NodePara' if possible, and always a child.
154 -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
156 -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
157 -- they MAY span over multilines, and they can be many from a single line.
158 -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
159 -- they MUST span only over a single and entire line.
161 -- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
162 mergeRow :: Rows -> Row -> Rows
164 debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $
165 mergeRowPrefix 0 rows $ List.reverse row
167 -- | Merge by considering matching prefixes.
169 -- 'HeaderGreat' and 'HeaderBar' work, not on indentation,
170 -- but on their vertical alignment as prefixes.
171 -- Hence, each new 'Row' has those prefixes zipped into a single one
172 -- when they match, are aligned and adjacent.
173 mergeRowPrefix :: ColNum -> Rows -> Row -> Rows
174 mergeRowPrefix col rows row =
175 debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $
178 (_, []) -> undefined -- NOTE: cannot happen with initRows
179 ( _new@(Tree (Sourced (FileRange _fn bn _en:|_sn) n) _ns):news
180 , _old@(Tree (Sourced (FileRange _fo _bo eo:|_so) _o) _os):_olds ) ->
181 case collapseRowsWhile isCollapsable rows of
182 [] -> mergeRowIndent rows row
183 head@(unTree -> ch@(Sourced (FileRange _fh bh _eh:|_sh) h)) : olds' ->
185 -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
186 -- then check if there is a matching HeaderGreat,
187 -- if so, discard new and restart with a col advanced to new's beginning
188 (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{})
189 | isAdjacent && isMatching ch -> discard
190 -- NOTE: same for HeaderBar
191 (NodeHeader HeaderBar{}, NodeHeader HeaderBar{})
192 | isAdjacent && isMatching ch -> discard
193 -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
194 -- makes it collapse entirely
195 (_, NodeHeader HeaderGreat{})
196 | col < filePos_column bh -> collapse
197 -- NOTE: same for HeaderBar
198 (_, NodeHeader HeaderBar{})
199 | col < filePos_column bh -> collapse
200 _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row
202 isAdjacent = filePos_line bn - filePos_line eo <= 1
203 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (filePos_column bh) rows news
204 collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row
206 isMatching (Sourced (FileRange _fh bh _eh:|_sh) h) =
207 filePos_column bn == filePos_column bh &&
209 isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $
210 \_t0@(unTree -> c0@(Sourced (FileRange _f0 b0 _e0:|_s0) _n0))
211 _t1@(unTree -> Sourced (FileRange _f1 b1 e1:|_s1) _n1) ->
212 not (isMatching c0) &&
213 (filePos_line b0 - filePos_line e1 <= 1) && -- adjacent
214 col < filePos_column b1 -- righter than col
216 -- | Merge by considering indentation.
217 mergeRowIndent :: Rows -> Row -> Rows
218 mergeRowIndent rows row =
219 debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $
222 (_, []) -> undefined -- NOTE: cannot happen with initRows
223 ( new@(Tree (Sourced ssn@(FileRange fn bn en:|sn) n) ns):news
224 ,old@(Tree (Sourced sso@(FileRange fo bo eo:|so) o) os):olds ) ->
225 case debug0 "mergeRowIndent/colNew" (filePos_column bn) `compare`
226 debug0 "mergeRowIndent/colOld" (filePos_column bo) of
227 -- NOTE: new is on the left
230 -- NOTE: merge adjacent NodeText
233 (NodeText tn, NodeText to)
234 | TL.null tn || TL.null to
235 , not isVerbatim -> collapse
236 | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
238 t = NodeText <$> Sourced (FileRange fo boNew eo:|so) (indent<>to) <> Sourced ssn tn
239 boNew = bo{filePos_column=filePos_column bn}
240 indent = TL.replicate (int64 $ filePos_column bo - filePos_column bn) " "
241 -- | Whether the horizontal delta is made of spaces
243 debug0 "mergeRowIndent/isIndented" $
246 (unTree -> (source -> (fileRange_end -> ep) :| _)) : _ ->
247 case filePos_line ep `compare` filePos_line bo of
249 EQ -> filePos_column ep <= filePos_column bn
252 -- NOTE: new is vertically aligned
255 -- NOTE: preserve all NodeText "", but still split into two NodePara
256 (NodeText tn, NodeText to)
257 | TL.null tn || TL.null to
258 , not isVerbatim -> collapse
259 | isAdjacent -> merge $ Tree (NodeText <$> Sourced sso to <> Sourced ssn tn) (os<>ns)
260 -- NOTE: HeaderSection can parent Nodes at the same level
261 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
262 if debug0 "mergeRowIndent/lvlNew" lvlNew
263 > debug0 "mergeRowIndent/lvlOld" lvlOld
270 -- NOTE: old is no HeaderSection, then collapse to any older and loop
271 (NodeHeader HeaderSection{}, _)
272 | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows
273 , (unTree -> (unSourced -> NodeHeader HeaderSection{})) <- sec ->
274 mergeRowIndent rows' row
276 isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $
277 \_t0@(unTree -> Sourced (FileRange _f0 b0 _e0:|_ss0) n0) _t1 ->
279 NodeHeader HeaderSection{} -> False
280 _ -> filePos_column bn == filePos_column b0
281 -- NOTE: in case of alignment, HeaderSection is parent
282 (_, NodeHeader HeaderSection{}) -> concat
285 -- NOTE: new is on the right
288 -- NOTE: keep NodeText "" out of old NodePara
289 (NodeText "", NodePara) -> collapse
290 -- NOTE: merge adjacent NodeText
291 (NodeText tn, NodeText to) ->
293 _ | TL.null tn || TL.null to
294 , not isVerbatim -> collapse
297 True -> merge $ Tree (NodeText <$> Sourced sso to <> Sourced ssn tn) (os<>ns)
301 False -> mergeRowIndent (collapseRoot old olds) (shifted:news)
303 shifted = Tree (Sourced (FileRange fn bnNew en:|sn) $ NodeText $ indent<>tn) (os<>ns)
304 bnNew = bn{filePos_column=filePos_column bo}
305 indent = TL.replicate (int64 $ filePos_column bn - filePos_column bo) " "
309 isAdjacent = filePos_line bn - filePos_line eo <= 1
310 -- | Whether a parent semantic want new to stay a NodeText
311 isVerbatim = any p rows
313 p (unTree -> (unSourced -> NodeHeader HeaderBar{})) = True
315 concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows
316 merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news
317 collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row
318 replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news
320 -- | Like 'mergeRowIndent', but without maintaining the appending,
321 -- hence collapsing all the 'Root's of the given 'Rows'.
323 -- NOTE: 'initRows' MUST have been the first 'Rows'
324 -- before calling 'mergeRowIndent' on it to get the given 'Rows'.
325 collapseRows :: Rows -> Roots
327 debug1_ "collapseRows" ("rows",rows) $
328 case collapseRowsWhile (\_new _old -> True) rows of
331 -- NOTE: subTrees returns the children of the updated initRows
333 -- | Collapse downto any last HeaderSection, returning it and its level.
334 collapseSection :: ColNum -> Rows -> Rows
335 collapseSection col = debug1 "collapseSection" "rows" go
337 go rows@(new@(unTree -> Sourced (FileRange _fn bn _en:|_sn) n):olds)
338 | col <= filePos_column bn =
340 NodeHeader HeaderSection{} -> rows
341 _ -> collapseSection col $ collapseRoot new $ go olds
344 collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
345 collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
347 rows@(new@(Tree (Sourced (FileRange _fn bn _en:|_sn) n) _ns):news) ->
350 old@(Tree (Sourced (FileRange _fo bo eo:|_so) o) _os):olds
351 | not $ test new old -> rows
353 case debug0 "collapseRowsWhile/colNew" (filePos_column bn) `compare`
354 debug0 "collapseRowsWhile/colOld" (filePos_column bo) of
355 -- NOTE: new is vertically aligned
358 -- NOTE: HeaderSection can parent Nodes at the same level
359 (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
360 if debug0 "collapseRowsWhile/lvlNew" lvlNew
361 > debug0 "collapseRowsWhile/lvlOld" lvlOld
368 debug "collapseRowsWhile/replace" $
369 collapseRowsWhile test $ (new:) $ collapseRoot old olds
370 -- NOTE: old is no HeaderSection, then collapse to any older and loop
371 (NodeHeader HeaderSection{}, _)
372 | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news
373 , (unTree -> (unSourced -> NodeHeader HeaderSection{})) <- sec ->
374 collapseRowsWhile test news'
376 isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $
377 \_t0@(unTree -> Sourced (FileRange _f0 b0 _e0:|_s0) n0) _t1 ->
379 NodeHeader HeaderSection{} -> False
380 _ -> filePos_column bn == filePos_column b0
381 -- NOTE: in case of alignment, HeaderSection is parent
382 (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse
383 -- NOTE: merge within old NodePara.
384 (_, NodePara) | isAdjacent -> collapse
387 -- NOTE: new is either on the left or on the right
390 isAdjacent = filePos_line bn - filePos_line eo <= 1
391 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news
392 collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds
394 -- | Put a 'Root' as a child of the head 'Root'.
396 -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
398 -- NOTE: any NodeText/NodeText merging must have been done before.
399 collapseRoot :: Root -> Rows -> Rows
400 collapseRoot new@(Tree (Sourced ssn@(FileRange _fn bn en:|_sn) n) _ns) rows =
401 debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
404 old@(Tree (Sourced (FileRange fo bo eo:|so) o) os) : olds ->
406 -- NOTE: no child into NodeText
407 (_, NodeText{}) -> collapse2
408 -- NOTE: NodeText can begin a NodePara
409 (NodeText tn, _) | not $ TL.null tn ->
411 -- NOTE: no NodePara within those
412 NodeHeader HeaderEqual{} -> collapse
413 NodeHeader HeaderBar{} -> collapse
414 NodeHeader HeaderDashDash{} -> collapse
415 -- NOTE: NodePara within those
416 NodePara | not isAdjacent -> para
419 -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara.
420 -- Note that since a NodePara is never adjacent to another,
421 -- it is not nested within another.
425 -- NOTE: no HeaderSection (even adjacent) within a NodePara
426 NodeHeader HeaderSection{} -> collapse2
428 | otherwise -> collapse2
431 isAdjacent = filePos_line bn - filePos_line eo <= 1
432 para = Tree (Sourced ssn NodePara) (return new) : rows
433 collapse = Tree (Sourced (FileRange fo bo en:|so) o) (os |> new) : olds
434 collapse2 = collapseRoot new $ collapseRoot old olds