{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE ViewPatterns #-} module Hdoc.TCT.Tree ( module Hdoc.TCT.Tree , Tree(..), Trees ) where import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), any) import Data.Function (($)) import Data.Functor ((<$>)) import Data.Int (Int) import Data.List.NonEmpty (NonEmpty(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ordering(..), Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence ((|>)) import Data.TreeSeq.Strict (Tree(..), Trees) import Prelude (undefined, Num(..)) import System.FilePath (FilePath) import Text.Show (Show(..)) import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import Hdoc.TCT.Utils import Hdoc.TCT.Cell import Hdoc.TCT.Elem import Hdoc.TCT.Debug -- * Type 'Root' -- | A single 'Tree' to gather all the 'Node's -- simplifies greatly the navigation and transformations, -- especially because the later XML or DTC output -- are themselves a single tree-like data structure. -- -- Also, having a single 'Tree' is easier to merge -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' "li" ""))@), -- and XML coming from the second parsing phase (eg. @NodePair (PairElem "li" [])@). -- -- For error reporting, indentation sensitivity and paragraph grouping, -- each 'Node' is annotated with a 'Cell' -- spanning over all its content (sub-'Trees' included). type Root = Tree (Cell Node) type Roots = Trees (Cell Node) pattern Tree0 :: a -> Tree a pattern Tree0 a <- Tree a (null -> True) where Tree0 a = Tree a mempty -- * Type 'Node' data Node = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive) | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive) | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's) | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's) | NodeLower !Name !ElemAttrs -- ^ node, @ XML node deriving (Eq,Show) instance Pretty Node -- * Type 'Header' data Header = HeaderColon !Name !White -- ^ @name: @ | HeaderEqual !Name !White -- ^ @name=@ | HeaderBar !Name !White -- ^ @name|@ | HeaderGreat !Name !White -- ^ @name>@ | HeaderBrackets !Name -- ^ @[name]@ | HeaderDot !Name -- ^ @1. @ | HeaderDash -- ^ @- @ | HeaderDashDash -- ^ @-- @ | HeaderSection !LevelSection -- ^ @# @ | HeaderDotSlash !FilePath -- ^ @./file @ deriving (Eq, Ord, Show) instance Pretty Header -- ** Type 'Name' type Name = TL.Text -- ** Type 'LevelSection' type LevelSection = Int -- * Type 'Pair' data Pair = PairElem !ElemName !ElemAttrs -- ^ @\text\@ | PairTag Bool -- ^ @\#text\#@ or @~\#text\#@ | PairAt Bool -- ^ @\@text\@@ or @~\@text\@@ | PairStar -- ^ @*text*@ | PairSlash -- ^ @/text/@ | PairUnderscore -- ^ @_value_@ | PairDash -- ^ @-text-@ | PairBackquote -- ^ @`text`@ | PairSinglequote -- ^ @'text'@ | PairDoublequote -- ^ @"text"@ | PairFrenchquote -- ^ @«text»@ | PairParen -- ^ @(text)@ | PairBrace -- ^ @{text}@ | PairBracket -- ^ @[text]@ deriving (Eq,Ord,Show) instance Pretty Pair -- * Type 'Token' data Token = TokenText !TL.Text | TokenEscape !Char -- ^ @\\char@ | TokenLink !Link | TokenAt !Bool !Ref -- ^ @\@foo@ or @~\@foo@ | TokenTag !Bool !Ref -- ^ @\#foo@ or @~\#foo@ deriving (Eq,Show) -- ** Type 'Link' type Link = TL.Text -- ** Type 'Ref' type Ref = TL.Text -- * Type 'Row' -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line. type Row = [Root] -- ** Type 'Rows' -- | In reverse order: a list of nodes in scope -- (hence to which the next line can append to). type Rows = [Root] -- | Having an initial 'Root' simplifies 'mergeRowIndent': -- one can always put the last 'Root' as a child to a previous one. -- This 'Root' just has to be discarded by 'collapseRows'. initRows :: Rows initRows = [Tree0 $ Cell (Span "" p p :| []) $ NodeHeader HeaderDash] where p = Pos{pos_line= -1, pos_column=0} -- NOTE: such that any following 'Root' -- is 'NodePara' if possible, and always a child. -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be. -- -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly), -- they MAY span over multilines, and they can be many from a single line. -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly), -- they MUST span only over a single and entire line. -- -- This is the main entry point to build 'Rows' by accumulating 'Row' into them. mergeRow :: Rows -> Row -> Rows mergeRow rows row = debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $ mergeRowPrefix 0 rows $ List.reverse row -- | Merge by considering matching prefixes. -- -- 'HeaderGreat' and 'HeaderBar' work, not on indentation, -- but on their vertical alignment as prefixes. -- Hence, each new 'Row' has those prefixes zipped into a single one -- when they match, are aligned and adjacent. mergeRowPrefix :: ColNum -> Rows -> Row -> Rows mergeRowPrefix col rows row = debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $ case (row,rows) of ([], _) -> rows (_, []) -> undefined -- NOTE: cannot happen with initRows ( _new@(Tree (Cell (Span _fn bn _en:|_sn) n) _ns):news , _old@(Tree (Cell (Span _fo _bo eo:|_so) _o) _os):_olds ) -> case collapseRowsWhile isCollapsable rows of [] -> mergeRowIndent rows row head@(unTree -> ch@(Cell (Span _fh bh _eh:|_sh) h)) : olds' -> case (n,h) of -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col -- then check if there is a matching HeaderGreat, -- if so, discard new and restart with a col advanced to new's beginning (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{}) | isAdjacent && isMatching ch -> discard -- NOTE: same for HeaderBar (NodeHeader HeaderBar{}, NodeHeader HeaderBar{}) | isAdjacent && isMatching ch -> discard -- NOTE: collapsing: any other new aligned or on the right of an adjacent head -- makes it collapse entirely (_, NodeHeader HeaderGreat{}) | col < pos_column bh -> collapse -- NOTE: same for HeaderBar (_, NodeHeader HeaderBar{}) | col < pos_column bh -> collapse _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row where isAdjacent = pos_line bn - pos_line eo <= 1 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (pos_column bh) rows news collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row where isMatching (Cell (Span _fh bh _eh:|_sh) h) = pos_column bn == pos_column bh && n == h isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $ \_t0@(unTree -> c0@(Cell (Span _f0 b0 _e0:|_s0) _n0)) _t1@(unTree -> Cell (Span _f1 b1 e1:|_s1) _n1) -> not (isMatching c0) && (pos_line b0 - pos_line e1 <= 1) && -- adjacent col < pos_column b1 -- righter than col -- | Merge by considering indentation. mergeRowIndent :: Rows -> Row -> Rows mergeRowIndent rows row = debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $ case (row,rows) of ([], _) -> rows (_, []) -> undefined -- NOTE: cannot happen with initRows ( new@(Tree (Cell ssn@(Span fn bn en:|sn) n) ns):news ,old@(Tree (Cell sso@(Span fo bo eo:|so) o) os):olds ) -> case debug0 "mergeRowIndent/colNew" (pos_column bn) `compare` debug0 "mergeRowIndent/colOld" (pos_column bo) of -- NOTE: new is on the left LT -> case (n,o) of -- NOTE: merge adjacent NodeText -- first -- second (NodeText tn, NodeText to) | TL.null tn || TL.null to , not isVerbatim -> collapse | isAdjacent && isIndented -> merge $ Tree t (os<>ns) where t = NodeText <$> Cell (Span fo boNew eo:|so) (indent<>to) <> Cell ssn tn boNew = bo{pos_column=pos_column bn} indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " " -- | Whether the horizontal delta is made of spaces isIndented = debug0 "mergeRowIndent/isIndented" $ case olds of [] -> True (unTree -> (cell_location -> (span_end -> ep) :| _)) : _ -> case pos_line ep `compare` pos_line bo of LT -> True EQ -> pos_column ep <= pos_column bn _ -> False _ -> collapse -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: preserve all NodeText "", but still split into two NodePara (NodeText tn, NodeText to) | TL.null tn || TL.null to , not isVerbatim -> collapse | isAdjacent -> merge $ Tree (NodeText <$> Cell sso to <> Cell ssn tn) (os<>ns) -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) -> if debug0 "mergeRowIndent/lvlNew" lvlNew > debug0 "mergeRowIndent/lvlOld" lvlOld -- # old -- ## new then concat -- ## old or # old -- # new # new else collapse -- NOTE: old is no HeaderSection, then collapse to any older and loop (NodeHeader HeaderSection{}, _) | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows , (unTree -> (unCell -> NodeHeader HeaderSection{})) <- sec -> mergeRowIndent rows' row where isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $ \_t0@(unTree -> Cell (Span _f0 b0 _e0:|_ss0) n0) _t1 -> case n0 of NodeHeader HeaderSection{} -> False _ -> pos_column bn == pos_column b0 -- NOTE: in case of alignment, HeaderSection is parent (_, NodeHeader HeaderSection{}) -> concat -- _ -> replace -- NOTE: new is on the right GT -> case (n,o) of -- NOTE: keep NodeText "" out of old NodePara (NodeText "", NodePara) -> collapse -- NOTE: merge adjacent NodeText (NodeText tn, NodeText to) -> case isAdjacent of _ | TL.null tn || TL.null to , not isVerbatim -> collapse -- old -- new True -> merge $ Tree (NodeText <$> Cell sso to <> Cell ssn tn) (os<>ns) -- old -- -- new False -> mergeRowIndent (collapseRoot old olds) (shifted:news) where shifted = Tree (Cell (Span fn bnNew en:|sn) $ NodeText $ indent<>tn) (os<>ns) bnNew = bn{pos_column=pos_column bo} indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " " -- _ -> concat where isAdjacent = pos_line bn - pos_line eo <= 1 -- | Whether a parent semantic want new to stay a NodeText isVerbatim = any p rows where p (unTree -> (unCell -> NodeHeader HeaderBar{})) = True p _ = False concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news -- | Like 'mergeRowIndent', but without maintaining the appending, -- hence collapsing all the 'Root's of the given 'Rows'. -- -- NOTE: 'initRows' MUST have been the first 'Rows' -- before calling 'mergeRowIndent' on it to get the given 'Rows'. collapseRows :: Rows -> Roots collapseRows rows = debug1_ "collapseRows" ("rows",rows) $ case collapseRowsWhile (\_new _old -> True) rows of [t] -> subTrees t _ -> undefined -- NOTE: subTrees returns the children of the updated initRows -- | Collapse downto any last HeaderSection, returning it and its level. collapseSection :: ColNum -> Rows -> Rows collapseSection col = debug1 "collapseSection" "rows" go where go rows@(new@(unTree -> Cell (Span _fn bn _en:|_sn) n):olds) | col <= pos_column bn = case n of NodeHeader HeaderSection{} -> rows _ -> collapseSection col $ collapseRoot new $ go olds go _ = mempty collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case [] -> mempty rows@(new@(Tree (Cell (Span _fn bn _en:|_sn) n) _ns):news) -> case news of [] -> rows old@(Tree (Cell (Span _fo bo eo:|_so) o) _os):olds | not $ test new old -> rows | otherwise -> case debug0 "collapseRowsWhile/colNew" (pos_column bn) `compare` debug0 "collapseRowsWhile/colOld" (pos_column bo) of -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) -> if debug0 "collapseRowsWhile/lvlNew" lvlNew > debug0 "collapseRowsWhile/lvlOld" lvlOld -- # old -- ## new then collapse -- ## old or # old -- # new # new else debug "collapseRowsWhile/replace" $ collapseRowsWhile test $ (new:) $ collapseRoot old olds -- NOTE: old is no HeaderSection, then collapse to any older and loop (NodeHeader HeaderSection{}, _) | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news , (unTree -> (unCell -> NodeHeader HeaderSection{})) <- sec -> collapseRowsWhile test news' where isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $ \_t0@(unTree -> Cell (Span _f0 b0 _e0:|_s0) n0) _t1 -> case n0 of NodeHeader HeaderSection{} -> False _ -> pos_column bn == pos_column b0 -- NOTE: in case of alignment, HeaderSection is parent (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse -- NOTE: merge within old NodePara. (_, NodePara) | isAdjacent -> collapse -- _ -> collapse2 -- NOTE: new is either on the left or on the right _ -> collapse where isAdjacent = pos_line bn - pos_line eo <= 1 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds -- | Put a 'Root' as a child of the head 'Root'. -- -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced. -- -- NOTE: any NodeText/NodeText merging must have been done before. collapseRoot :: Root -> Rows -> Rows collapseRoot new@(Tree (Cell ssn@(Span _fn bn en:|_sn) n) _ns) rows = debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $ case rows of [] -> return new old@(Tree (Cell (Span fo bo eo:|so) o) os) : olds -> case (n,o) of -- NOTE: no child into NodeText (_, NodeText{}) -> collapse2 -- NOTE: NodeText can begin a NodePara (NodeText tn, _) | not $ TL.null tn -> case o of -- NOTE: no NodePara within those NodeHeader HeaderEqual{} -> collapse NodeHeader HeaderBar{} -> collapse NodeHeader HeaderDashDash{} -> collapse -- NOTE: NodePara within those NodePara | not isAdjacent -> para NodeHeader{} -> para _ -> collapse -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara. -- Note that since a NodePara is never adjacent to another, -- it is not nested within another. (_, NodePara) | isAdjacent -> case n of -- NOTE: no HeaderSection (even adjacent) within a NodePara NodeHeader HeaderSection{} -> collapse2 _ -> collapse | otherwise -> collapse2 _ -> collapse where isAdjacent = pos_line bn - pos_line eo <= 1 para = Tree (Cell ssn NodePara) (return new) : rows collapse = Tree (Cell (Span fo bo en:|so) o) (os |> new) : olds collapse2 = collapseRoot new $ collapseRoot old olds