import Data.Bool
import Data.Char (Char)
import Data.Eq (Eq(..))
-import Data.Foldable (Foldable(..))
+import Data.Foldable (Foldable(..), any)
import Data.Function (($))
import Data.Functor ((<$>))
import Data.Int (Int)
-import Data.Maybe (Maybe(..))
+import Data.List.NonEmpty (NonEmpty(..))
import Data.Monoid (Monoid(..))
import Data.Ord (Ordering(..), Ord(..))
import Data.Semigroup (Semigroup(..))
-import Data.Sequence ((|>), ViewR(..))
+import Data.Sequence ((|>))
import Data.TreeSeq.Strict (Tree(..), Trees)
import Prelude (undefined, Num(..))
import System.FilePath (FilePath)
-- * Type 'Root'
-- | A single 'Tree' to rule all the 'Node's
--- simplifies the navigation and transformations.
+-- simplifies greatly the navigation and transformations,
+-- especially because the later XML or DTC output
+-- are themselves a single tree-like data structure.
+--
+-- Also, having a single 'Tree' is easier to merge
+-- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' "li" ""))@),
+-- and XML coming from the second parsing phase (eg. @NodePair (PairElem "li" [])@).
--
-- For error reporting, each 'Node' is annotated with a 'Cell'
-- spanning over all its content (sub-'Trees' included).
| NodeLower !Name !ElemAttrs -- ^ node, @<name a=b@
| NodePara -- ^ node, gather trees by paragraph,
-- useful to know when to generate a <para> XML node
- | NodeGroup -- ^ node, group trees into a single tree,
- -- useful to return many trees when only one is expected
deriving (Eq,Show)
instance Pretty Node
-- * Type 'Pair'
data Pair
- = PairElem !ElemName !ElemAttrs -- ^ @<elem n0=v0 n1=v1>value</elem>@
- | PairHash -- ^ @#value#@
- | PairStar -- ^ @*value*@
- | PairSlash -- ^ @/value/@
+ = PairElem !ElemName !ElemAttrs -- ^ @\<name a0=v0 a1=v1>text\</name>@
+ | PairHash -- ^ @\#text#@
+ | PairStar -- ^ @*text*@
+ | PairSlash -- ^ @/text/@
| PairUnderscore -- ^ @_value_@
- | PairDash -- ^ @-value-@
- | PairBackquote -- ^ @`value`@
- | PairSinglequote -- ^ @'value'@
- | PairDoublequote -- ^ @"value"@
- | PairFrenchquote -- ^ @«value»@
- | PairParen -- ^ @(value)@
- | PairBrace -- ^ @{value}@
- | PairBracket -- ^ @[value]@
+ | PairDash -- ^ @-text-@
+ | PairBackquote -- ^ @`text`@
+ | PairSinglequote -- ^ @'text'@
+ | PairDoublequote -- ^ @"text"@
+ | PairFrenchquote -- ^ @«text»@
+ | PairParen -- ^ @(text)@
+ | PairBrace -- ^ @{text}@
+ | PairBracket -- ^ @[text]@
deriving (Eq,Ord,Show)
instance Pretty Pair
-- (hence to which the next line can append to).
type Rows = [Root]
--- | Having an initial 'Root' simplifies 'appendRow':
+-- | Having an initial 'Root' simplifies 'mergeRowIndent':
-- one can always put the last 'Root' as a child to a previous one.
-- This 'Root' just has to be discarded by 'collapseRows'.
initRows :: Rows
-initRows = [Tree0 (Cell p p NodeGroup)]
- where p = pos1{pos_line= -1, pos_column=0}
+initRows = [Tree0 $ Cell (Span "" p p :| []) $ NodeHeader HeaderDash]
+ where p = Pos{pos_line= -1, pos_column=0}
-- NOTE: such that any following 'Root'
-- is 'NodePara' if possible, and always a child.
--- | @appendRow rows row@ appends @row@ to @rows@.
+-- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be.
+--
+-- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly),
+-- they MAY span over multilines, and they can be many from a single line.
+-- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly),
+-- they MUST span only over a single and entire line.
+--
+-- This is the main entry point to build 'Rows' by accumulating 'Row' into them.
+mergeRow :: Rows -> Row -> Rows
+mergeRow rows row =
+ debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $
+ mergeRowPrefix 0 rows $ List.reverse row
+
+-- | Merge by considering matching prefixes.
--
--- [@rows@] parent 'Rows', from closest to farthest (non-strictly descending)
--- [@row@] next 'Row', from leftest column to rightest (non-stricly ascending)
-appendRow :: Rows -> Row -> Rows
-appendRow rows row =
- debug2_ "appendRow" ("news",row) ("olds",rows) $
+-- 'HeaderGreat' and 'HeaderBar' work, not on indentation,
+-- but on their vertical alignment as prefixes.
+-- Hence, each new 'Row' has those prefixes zipped into a single one
+-- when they match, are aligned and adjacent.
+mergeRowPrefix :: ColNum -> Rows -> Row -> Rows
+mergeRowPrefix col rows row =
+ debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $
case (row,rows) of
+ ([], _) -> rows
(_, []) -> undefined -- NOTE: cannot happen with initRows
+ ( _new@(Tree (Cell (Span _fn bn _en:|_sn) n) _ns):news
+ , _old@(Tree (Cell (Span _fo _bo eo:|_so) _o) _os):_olds ) ->
+ case collapseRowsWhile isCollapsable rows of
+ [] -> mergeRowIndent rows row
+ head@(unTree -> ch@(Cell (Span _fh bh _eh:|_sh) h)) : olds' ->
+ case (n,h) of
+ -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col
+ -- then check if there is a matching HeaderGreat,
+ -- if so, discard new and restart with a col advanced to new's beginning
+ (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{})
+ | isAdjacent && isMatching ch -> discard
+ -- NOTE: same for HeaderBar
+ (NodeHeader HeaderBar{}, NodeHeader HeaderBar{})
+ | isAdjacent && isMatching ch -> discard
+ -- NOTE: collapsing: any other new aligned or on the right of an adjacent head
+ -- makes it collapse entirely
+ (_, NodeHeader HeaderGreat{})
+ | col < pos_column bh -> collapse
+ -- NOTE: same for HeaderBar
+ (_, NodeHeader HeaderBar{})
+ | col < pos_column bh -> collapse
+ _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row
+ where
+ isAdjacent = pos_line bn - pos_line eo <= 1
+ discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (pos_column bh) rows news
+ collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row
+ where
+ isMatching (Cell (Span _fh bh _eh:|_sh) h) =
+ pos_column bn == pos_column bh &&
+ n == h
+ isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $
+ \_t0@(unTree -> c0@(Cell (Span _f0 b0 _e0:|_s0) _n0))
+ _t1@(unTree -> Cell (Span _f1 b1 e1:|_s1) _n1) ->
+ not (isMatching c0) &&
+ (pos_line b0 - pos_line e1 <= 1) && -- adjacent
+ col < pos_column b1 -- righter than col
+
+-- | Merge by considering indentation.
+mergeRowIndent :: Rows -> Row -> Rows
+mergeRowIndent rows row =
+ debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $
+ case (row,rows) of
([], _) -> rows
- (new@(Tree (Cell bn en n) ns):news, old@(Tree (Cell bo eo o) os):olds) ->
- case debug0 "appendRow/colNew" (pos_column bn) `compare`
- debug0 "appendRow/colOld" (pos_column bo) of
- -- NOTE: new is vertically lower
+ (_, []) -> undefined -- NOTE: cannot happen with initRows
+ ( new@(Tree (Cell ssn@(Span fn bn en:|sn) n) ns):news
+ ,old@(Tree (Cell sso@(Span fo bo eo:|so) o) os):olds ) ->
+ case debug0 "mergeRowIndent/colNew" (pos_column bn) `compare`
+ debug0 "mergeRowIndent/colOld" (pos_column bo) of
+ -- NOTE: new is on the left
LT ->
case (n,o) of
-- NOTE: merge adjacent NodeText
-- first
-- second
(NodeText tn, NodeText to)
- | TL.null tn || TL.null to -> child
- | not isNewPara && isIndented -> merge $ Tree t (os<>ns)
+ | TL.null tn || TL.null to
+ , not isVerbatim -> collapse
+ | isAdjacent && isIndented -> merge $ Tree t (os<>ns)
where
- t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn
+ t = NodeText <$> Cell (Span fo boNew eo:|so) (indent<>to) <> Cell ssn tn
boNew = bo{pos_column=pos_column bn}
indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " "
- -- | Whether the horizontal diff is made of spaces
+ -- | Whether the horizontal delta is made of spaces
isIndented =
- debug0 "appendRow/isIndented" $
+ debug0 "mergeRowIndent/isIndented" $
case olds of
[] -> True
- (unTree -> cell_end -> ep) : _ ->
+ (unTree -> (cell_spans -> (span_end -> ep) :| _)) : _ ->
case pos_line ep `compare` pos_line bo of
LT -> True
EQ -> pos_column ep <= pos_column bn
_ -> False
- _ -> child
+ _ -> collapse
-- NOTE: new is vertically aligned
EQ ->
case (n,o) of
-- NOTE: preserve all NodeText "", but still split into two NodePara
(NodeText tn, NodeText to)
- | TL.null tn || TL.null to -> child
- | not isNewPara -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
+ | TL.null tn || TL.null to
+ , not isVerbatim -> collapse
+ | isAdjacent -> merge $ Tree (NodeText <$> Cell sso to <> Cell ssn tn) (os<>ns)
-- NOTE: HeaderSection can parent Nodes at the same level
- (NodeHeader (HeaderSection lvlNew), _)
- | Just (lvlOld, rows'@(old':olds')) <- collapseSection (pos_column bn) rows ->
- if debug0 "appendRow/lvlNew" lvlNew
- > debug0 "appendRow/lvlOld" lvlOld
- then -- # old
- -- ## new
- {-concat-} List.reverse row <> rows'
- else -- ## old or # old
- -- # new # new
- {-child old'-} appendRow (appendChild old' olds') row
- -- NOTE: concat everything else following a HeaderSection.
+ (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
+ if debug0 "mergeRowIndent/lvlNew" lvlNew
+ > debug0 "mergeRowIndent/lvlOld" lvlOld
+ -- # old
+ -- ## new
+ then concat
+ -- ## old or # old
+ -- # new # new
+ else collapse
+ -- NOTE: old is no HeaderSection, then collapse to any older and loop
+ (NodeHeader HeaderSection{}, _)
+ | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows
+ , (unTree -> (unCell -> NodeHeader HeaderSection{})) <- sec ->
+ mergeRowIndent rows' row
+ where
+ isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $
+ \_t0@(unTree -> Cell (Span _f0 b0 _e0:|_ss0) n0) _t1 ->
+ case n0 of
+ NodeHeader HeaderSection{} -> False
+ _ -> pos_column bn == pos_column b0
+ -- NOTE: in case of alignment, HeaderSection is parent
(_, NodeHeader HeaderSection{}) -> concat
- {-
- (NodeHeader ho@HeaderGreat{}, NodeHeader hn) | ho == hn ->
- debug "appendRow/HeaderGreat" $ appendRow rows news
- -}
--
_ -> replace
- -- NOTE: new is vertically greater
+ -- NOTE: new is on the right
GT ->
case (n,o) of
-- NOTE: keep NodeText "" out of old NodePara
- (NodeText "", NodePara) -> child
+ (NodeText "", NodePara) -> collapse
-- NOTE: merge adjacent NodeText
(NodeText tn, NodeText to) ->
- case isNewPara of
- _ | TL.null tn || TL.null to -> child
+ case isAdjacent of
+ _ | TL.null tn || TL.null to
+ , not isVerbatim -> collapse
+ -- old
+ -- new
+ True -> merge $ Tree (NodeText <$> Cell sso to <> Cell ssn tn) (os<>ns)
-- old
--
-- new
- True -> appendRow (appendChild old olds) (shifted:news)
+ False -> mergeRowIndent (collapseRoot old olds) (shifted:news)
where
- shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns)
+ shifted = Tree (Cell (Span fn bnNew en:|sn) $ NodeText $ indent<>tn) (os<>ns)
bnNew = bn{pos_column=pos_column bo}
indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " "
- -- old
- -- new
- False -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns)
--
_ -> concat
where
- isNewPara = pos_line bn - pos_line eo > 1
- concat = debug "appendRow/concat" $ List.reverse row <> rows
- merge m = debug "appendRow/merge" $ appendRow (m : olds) news
- child = debug "appendRow/child" $ appendRow (appendChild old olds) row
- replace = debug "appendRow/replace" $ appendRow (new : appendChild old olds) news
+ isAdjacent = pos_line bn - pos_line eo <= 1
+ -- | Whether a parent semantic want new to stay a NodeText
+ isVerbatim = any p rows
+ where
+ p (unTree -> (unCell -> NodeHeader HeaderBar{})) = True
+ p _ = False
+ concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows
+ merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news
+ collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row
+ replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news
+
+-- | Like 'mergeRowIndent', but without maintaining the appending,
+-- hence collapsing all the 'Root's of the given 'Rows'.
+--
+-- NOTE: 'initRows' MUST have been the first 'Rows'
+-- before calling 'mergeRowIndent' on it to get the given 'Rows'.
+collapseRows :: Rows -> Roots
+collapseRows rows =
+ debug1_ "collapseRows" ("rows",rows) $
+ case collapseRowsWhile (\_new _old -> True) rows of
+ [t] -> subTrees t
+ _ -> undefined
+ -- NOTE: subTrees returns the children of the updated initRows
-- | Collapse downto any last HeaderSection, returning it and its level.
-collapseSection :: ColNum -> Rows -> Maybe (LevelSection,Rows)
+collapseSection :: ColNum -> Rows -> Rows
collapseSection col = debug1 "collapseSection" "rows" go
where
- go rows@(new@(unTree -> Cell bn _en n):olds)
- | col == pos_column bn =
+ go rows@(new@(unTree -> Cell (Span _fn bn _en:|_sn) n):olds)
+ | col <= pos_column bn =
case n of
- NodeHeader (HeaderSection lvl) -> return (lvl, rows)
- _ -> (appendChild new <$>) <$> go olds
- go _ = Nothing
+ NodeHeader HeaderSection{} -> rows
+ _ -> collapseSection col $ collapseRoot new $ go olds
+ go _ = mempty
--- | Like 'appendRow', but without maintaining the appending,
--- hence collapsing all the 'Root's of the given 'Rows'.
---
--- NOTE: 'initRows' MUST have been the first 'Rows'
--- before calling 'appendRow' on it to get the given 'Rows'.
-collapseRows :: Rows -> Roots
-collapseRows = debug1 "collapseRows" "rows" $ \case
+collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows
+collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case
[] -> mempty
- new@(Tree (Cell bn _en n) _ns):olds ->
- case olds of
- [] -> subTrees new
- old@(Tree (Cell bo eo o) _os):oldss ->
- case debug0 "colNew" (pos_column bn) `compare`
- debug0 "colOld" (pos_column bo) of
+ rows@(new@(Tree (Cell (Span _fn bn _en:|_sn) n) _ns):news) ->
+ case news of
+ [] -> rows
+ old@(Tree (Cell (Span _fo bo eo:|_so) o) _os):olds
+ | not $ test new old -> rows
+ | otherwise ->
+ case debug0 "collapseRowsWhile/colNew" (pos_column bn) `compare`
+ debug0 "collapseRowsWhile/colOld" (pos_column bo) of
-- NOTE: new is vertically aligned
EQ ->
case (n,o) of
- (NodeHeader (HeaderSection lvlNew), _)
- | Just (lvlOld, old':olds') <- collapseSection (pos_column bn) olds ->
- if debug0 "collapseRows/lvlNew" lvlNew
- > debug0 "collapseRows/lvlOld" lvlOld
- then -- # old
- -- ## new
- {-child new-} collapseRows $ appendChild new $ old':olds'
- else -- ## old or # old
- -- # new # new
- {-child old'-} collapseRows $ new:appendChild old' olds'
- -- NOTE: in case of alignment, HeaderSection is parent.
- (_, NodeHeader HeaderSection{}) -> child
+ -- NOTE: HeaderSection can parent Nodes at the same level
+ (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) ->
+ if debug0 "collapseRowsWhile/lvlNew" lvlNew
+ > debug0 "collapseRowsWhile/lvlOld" lvlOld
+ -- # old
+ -- ## new
+ then collapse
+ -- ## old or # old
+ -- # new # new
+ else
+ debug "collapseRowsWhile/replace" $
+ collapseRowsWhile test $ (new:) $ collapseRoot old olds
+ -- NOTE: old is no HeaderSection, then collapse to any older and loop
+ (NodeHeader HeaderSection{}, _)
+ | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news
+ , (unTree -> (unCell -> NodeHeader HeaderSection{})) <- sec ->
+ collapseRowsWhile test news'
+ where
+ isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $
+ \_t0@(unTree -> Cell (Span _f0 b0 _e0:|_s0) n0) _t1 ->
+ case n0 of
+ NodeHeader HeaderSection{} -> False
+ _ -> pos_column bn == pos_column b0
+ -- NOTE: in case of alignment, HeaderSection is parent
+ (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse
-- NOTE: merge within old NodePara.
- (_, NodePara{}) | not isNewPara -> child
+ (_, NodePara) | isAdjacent -> collapse
--
- _ -> child2
- -- NOTE: new is either vertically lower or greater
- _ -> child
+ _ -> collapse2
+ -- NOTE: new is either on the left or on the right
+ _ -> collapse
where
- isNewPara = pos_line bn - pos_line eo > 1
- child, child2 :: Roots
- child = debug "collapseRows/child" $ collapseRows $ appendChild new olds
- child2 = debug "collapseRows/child2" $ collapseRows $ appendChild new $ appendChild old oldss
+ isAdjacent = pos_line bn - pos_line eo <= 1
+ collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news
+ collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds
-- | Put a 'Root' as a child of the head 'Root'.
--
--- NOTE: 'appendChild' is where 'NodePara' may be introduced.
+-- NOTE: 'collapseRoot' is where 'NodePara' may be introduced.
+--
-- NOTE: any NodeText/NodeText merging must have been done before.
-appendChild :: Root -> Rows -> Rows
-appendChild new@(Tree (Cell bn en n) _ns) rows =
- debug2_ "appendChild" ("new",Seq.singleton new) ("rows",rows) $
+collapseRoot :: Root -> Rows -> Rows
+collapseRoot new@(Tree (Cell ssn@(Span _fn bn en:|_sn) n) _ns) rows =
+ debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $
case rows of
[] -> return new
- old@(Tree (Cell bo eo o) os) : olds ->
+ old@(Tree (Cell (Span fo bo eo:|so) o) os) : olds ->
case (n,o) of
- -- NOTE: never put a NodePara directly within another
- (NodePara, NodePara) -> child2
- -- NOTE: never put a child to NodeText
- (_, NodeText{}) -> child2
+ -- NOTE: no child into NodeText
+ (_, NodeText{}) -> collapse2
-- NOTE: NodeText can begin a NodePara
(NodeText tn, _) | not $ TL.null tn ->
case o of
-- NOTE: no NodePara within those
- NodeHeader HeaderEqual{} -> child
- NodeHeader HeaderBar{} -> child
- NodeHeader HeaderDashDash{} -> child
+ NodeHeader HeaderEqual{} -> collapse
+ NodeHeader HeaderBar{} -> collapse
+ NodeHeader HeaderDashDash{} -> collapse
-- NOTE: NodePara within those
- NodePara | isNewPara -> para
+ NodePara | not isAdjacent -> para
NodeHeader{} -> para
- NodeGroup -> para
- _ -> child
- _ -> child
+ _ -> collapse
+ -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara.
+ -- Note that since a NodePara is never adjacent to another,
+ -- it is not nested within another.
+ (_, NodePara)
+ | isAdjacent ->
+ case n of
+ -- NOTE: no HeaderSection (even adjacent) within a NodePara
+ NodeHeader HeaderSection{} -> collapse2
+ _ -> collapse
+ | otherwise -> collapse2
+ _ -> collapse
where
- isNewPara = pos_line bn - pos_line eo > 1
- child = Tree (Cell bo en o) (os |> new) : olds
- child2 = appendChild new $ appendChild old olds
- para = Tree (Cell bn en NodePara) (return new) : rows
-
--- | Return a 'Tree' from a 'Cell' node and 'subTrees',
--- while adjusting the node's 'cell_end'
--- with the last 'Tree' of the 'subTrees'.
-tree :: Cell a -> Trees (Cell a) -> Tree (Cell a)
-tree (Cell bp ep a) ts = Tree (Cell bp ep' a) ts
- where
- ep' = case Seq.viewr ts of
- EmptyR -> ep
- _ :> (unTree -> cell_end -> p) -> p
+ isAdjacent = pos_line bn - pos_line eo <= 1
+ para = Tree (Cell ssn NodePara) (return new) : rows
+ collapse = Tree (Cell (Span fo bo en:|so) o) (os |> new) : olds
+ collapse2 = collapseRoot new $ collapseRoot old olds