{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE ViewPatterns #-} module Hdoc.TCT.Tree ( module Hdoc.TCT.Tree , Tree(..), Trees ) where import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), any) import Data.Function (($)) import Data.Functor ((<$>)) import Data.Int (Int) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ordering(..), Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence ((|>)) import Data.TreeSeq.Strict (Tree(..), Trees) import Prelude (undefined, Num(..), Bounded(..)) import System.FilePath (FilePath) import Text.Show (Show(..)) import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import qualified Text.Megaparsec as P import Hdoc.TCT.Utils import Hdoc.TCT.Cell import Hdoc.TCT.Elem import Hdoc.TCT.Debug -- * Type 'Root' -- | A single 'Tree' to gather all the 'Node's -- simplifies greatly the navigation and transformations, -- especially because the later XML or DTC output -- are themselves a single tree-like data structure. -- -- Also, having a single 'Tree' is easier to merge -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' ('Just' "li") ""))@), -- and XML coming from the second parsing phase (eg. @'NodePair' ('PairElem' ('Just' "li") [])@). -- -- For error reporting, indentation sensitivity and paragraph grouping, -- each 'Node' is annotated with a 'Cell' -- spanning over all its content (sub-'Trees' included). type Root = Tree (Cell Node) type Roots = Trees (Cell Node) pattern Tree0 :: a -> Tree a pattern Tree0 a <- Tree a (null -> True) where Tree0 a = Tree a mempty -- * Type 'Node' data Node = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive) | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive) | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's) | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's) | NodeLower !ElemName !ElemAttrs -- ^ node, @ XML node deriving (Eq,Show) instance Pretty Node -- * Type 'Header' data Header = HeaderColon !(Maybe ElemName) !White -- ^ @name: @ | HeaderEqual !ElemName !White -- ^ @name=@ | HeaderBar !(Maybe ElemName) !White -- ^ @name|@ | HeaderGreat !(Maybe ElemName) !White -- ^ @name>@ | HeaderBrackets !TL.Text -- ^ @[name]@ | HeaderDot !TL.Text -- ^ @1. @ | HeaderDash -- ^ @- @ | HeaderDashDash -- ^ @-- @ | HeaderSection !LevelSection -- ^ @# @ | HeaderDotSlash !FilePath -- ^ @./file @ deriving (Eq, Ord, Show) instance Pretty Header {- -- ** Type 'Name' type Name = TL.Text -} -- ** Type 'LevelSection' type LevelSection = Int -- * Type 'Pair' data Pair = PairElem !ElemName !ElemAttrs -- ^ @\text\@ | PairTag Bool -- ^ @\#text\#@ or @~\#text\#@ | PairAt Bool -- ^ @\@text\@@ or @~\@text\@@ | PairStar -- ^ @*text*@ | PairSlash -- ^ @/text/@ | PairUnderscore -- ^ @_value_@ | PairDash -- ^ @-text-@ | PairBackquote -- ^ @`text`@ | PairSinglequote -- ^ @'text'@ | PairDoublequote -- ^ @"text"@ | PairFrenchquote -- ^ @«text»@ | PairParen -- ^ @(text)@ | PairBrace -- ^ @{text}@ | PairBracket -- ^ @[text]@ deriving (Eq,Ord,Show) instance Pretty Pair -- * Type 'Token' data Token = TokenText !TL.Text | TokenEscape !Char -- ^ @\\char@ | TokenLink !Link | TokenAt !Bool !Ref -- ^ @\@foo@ or @~\@foo@ | TokenTag !Bool !Ref -- ^ @\#foo@ or @~\#foo@ deriving (Eq,Show) {- -- * Type 'Filter' data Filter = FilterAnd !Filter !Filter | FilterOr !Filter !Filter | FilterNot !Filter | FilterTag !Ref | FilterAt !Ref deriving (Eq,Ord,Show) -} -- ** Type 'Link' type Link = TL.Text -- ** Type 'Ref' type Ref = TL.Text -- * Type 'Row' -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line. type Row = [Root] -- ** Type 'Rows' -- | In reverse order: a list of nodes in scope -- (hence to which the next line can append to). type Rows = [Root] -- | Having an initial 'Root' simplifies 'mergeRowIndent': -- one can always put the last 'Root' as a child to a previous one. -- This 'Root' just has to be discarded by 'collapseRows'. initRows :: Rows initRows = [Tree0 $ Sourced (FileRange "" p p :| []) $ NodeHeader HeaderDash] where p = LineColumn{lineNum=numM1, colNum=num0} -- NOTE: lineNum= -1 is a hack so that any following 'Root' -- becomes a 'NodePara' if possible, and always a child. numM1 = numMax<>numMax<>P.pos1 num0 = numM1<>P.pos1 numMax = P.mkPos maxBound -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be. -- -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly), -- they MAY span over multilines, and they can be many from a single line. -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly), -- they MUST span only over a single and entire line. -- -- This is the main entry point to build 'Rows' by accumulating 'Row' into them. mergeRow :: Rows -> Row -> Rows mergeRow rows row = debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $ mergeRowPrefix 0 rows $ List.reverse row -- | Merge by considering matching prefixes. -- -- 'HeaderGreat' and 'HeaderBar' work, not on indentation, -- but on their vertical alignment as prefixes. -- Hence, each new 'Row' has those prefixes zipped into a single one -- when they match, are aligned and adjacent. mergeRowPrefix :: ColInt -> Rows -> Row -> Rows mergeRowPrefix col rows row = debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $ case (row,rows) of ([], _) -> rows (_, []) -> undefined -- NOTE: cannot happen with initRows ( _new@(Tree (Sourced (FileRange _fn bn _en:|_sn) n) _ns):news , _old@(Tree (Sourced (FileRange _fo _bo eo:|_so) _o) _os):_olds ) -> case collapseRowsWhile isCollapsable rows of [] -> mergeRowIndent rows row head@(unTree -> ch@(Sourced (FileRange _fh bh _eh:|_sh) h)) : olds' -> case (n,h) of -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col -- then check if there is a matching HeaderGreat, -- if so, discard new and restart with a col advanced to new's beginning (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{}) | isAdjacent && isMatching ch -> discard -- NOTE: same for HeaderBar (NodeHeader HeaderBar{}, NodeHeader HeaderBar{}) | isAdjacent && isMatching ch -> discard -- NOTE: collapsing: any other new aligned or on the right of an adjacent head -- makes it collapse entirely (_, NodeHeader HeaderGreat{}) | col < colInt bh -> collapse -- NOTE: same for HeaderBar (_, NodeHeader HeaderBar{}) | col < colInt bh -> collapse _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row where isAdjacent = lineInt bn - lineInt eo <= 1 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (colInt bh) rows news collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row where isMatching (Sourced (FileRange _fh bh _eh:|_sh) h) = colInt bn == colInt bh && n == h isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $ \_t0@(unTree -> c0@(Sourced (FileRange _f0 b0 _e0:|_s0) _n0)) _t1@(unTree -> Sourced (FileRange _f1 b1 e1:|_s1) _n1) -> not (isMatching c0) && (lineInt b0 - lineInt e1 <= 1) && -- adjacent col < colInt b1 -- righter than col -- | Merge by considering indentation. mergeRowIndent :: Rows -> Row -> Rows mergeRowIndent rows row = debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $ case (row,rows) of ([], _) -> rows (_, []) -> undefined -- NOTE: cannot happen with initRows ( new@(Tree (Sourced ssn@(FileRange fn bn en:|sn) n) ns):news ,old@(Tree (Sourced sso@(FileRange fo bo eo:|so) o) os):olds ) -> case debug0 "mergeRowIndent/colNew" (colInt bn) `compare` debug0 "mergeRowIndent/colOld" (colInt bo) of -- NOTE: new is on the left LT -> case (n,o) of -- NOTE: merge adjacent NodeText -- first -- second (NodeText tn, NodeText to) | TL.null tn || TL.null to , not isVerbatim -> collapse | isAdjacent && isIndented -> merge $ Tree t (os<>ns) where t = NodeText <$> Sourced (FileRange fo boNew eo:|so) (indent<>to) <> Sourced ssn tn boNew = bo{colNum=colNum bn} indent = TL.replicate (int64 $ colInt bo - colInt bn) " " -- | Whether the horizontal delta is made of spaces isIndented = debug0 "mergeRowIndent/isIndented" $ case olds of [] -> True (unTree -> (source -> (fileRange_end -> ep) :| _)) : _ -> case lineInt ep `compare` lineInt bo of LT -> True EQ -> colInt ep <= colInt bn _ -> False _ -> collapse -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: preserve all NodeText "", but still split into two NodePara (NodeText tn, NodeText to) | TL.null tn || TL.null to , not isVerbatim -> collapse | isAdjacent -> merge $ Tree (NodeText <$> Sourced sso to <> Sourced ssn tn) (os<>ns) -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) -> if debug0 "mergeRowIndent/lvlNew" lvlNew > debug0 "mergeRowIndent/lvlOld" lvlOld -- # old -- ## new then concat -- ## old or # old -- # new # new else collapse -- NOTE: old is no HeaderSection, then collapse to any older and loop (NodeHeader HeaderSection{}, _) | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows , (unTree -> (unSourced -> NodeHeader HeaderSection{})) <- sec -> mergeRowIndent rows' row where isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $ \_t0@(unTree -> Sourced (FileRange _f0 b0 _e0:|_ss0) n0) _t1 -> case n0 of NodeHeader HeaderSection{} -> False _ -> colInt bn == colInt b0 -- NOTE: in case of alignment, HeaderSection is parent (_, NodeHeader HeaderSection{}) -> concat -- _ -> replace -- NOTE: new is on the right GT -> case (n,o) of -- NOTE: keep NodeText "" out of old NodePara (NodeText "", NodePara) -> collapse -- NOTE: merge adjacent NodeText (NodeText tn, NodeText to) -> case isAdjacent of _ | TL.null tn || TL.null to , not isVerbatim -> collapse -- old -- new True -> merge $ Tree (NodeText <$> Sourced sso to <> Sourced ssn tn) (os<>ns) -- old -- -- new False -> mergeRowIndent (collapseRoot old olds) (shifted:news) where shifted = Tree (Sourced (FileRange fn bnNew en:|sn) $ NodeText $ indent<>tn) (os<>ns) bnNew = bn{colNum=colNum bo} indent = TL.replicate (int64 $ colInt bn - colInt bo) " " -- _ -> concat where isAdjacent = lineInt bn - lineInt eo <= 1 -- | Whether a parent semantic want new to stay a NodeText isVerbatim = any p rows where p (unTree -> (unSourced -> NodeHeader HeaderBar{})) = True p _ = False concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news -- | Like 'mergeRowIndent', but without maintaining the appending, -- hence collapsing all the 'Root's of the given 'Rows'. -- -- NOTE: 'initRows' MUST have been the first 'Rows' -- before calling 'mergeRowIndent' on it to get the given 'Rows'. collapseRows :: Rows -> Roots collapseRows rows = debug1_ "collapseRows" ("rows",rows) $ case collapseRowsWhile (\_new _old -> True) rows of [t] -> subTrees t _ -> undefined -- NOTE: subTrees returns the children of the updated initRows -- | Collapse downto any last HeaderSection, returning it and its level. collapseSection :: ColInt -> Rows -> Rows collapseSection col = debug1 "collapseSection" "rows" go where go rows@(new@(unTree -> Sourced (FileRange _fn bn _en:|_sn) n):olds) | col <= colInt bn = case n of NodeHeader HeaderSection{} -> rows _ -> collapseSection col $ collapseRoot new $ go olds go _ = mempty collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case [] -> mempty rows@(new@(Tree (Sourced (FileRange _fn bn _en:|_sn) n) _ns):news) -> case news of [] -> rows old@(Tree (Sourced (FileRange _fo bo eo:|_so) o) _os):olds | not $ test new old -> rows | otherwise -> case debug0 "collapseRowsWhile/colNew" (colInt bn) `compare` debug0 "collapseRowsWhile/colOld" (colInt bo) of -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) -> if debug0 "collapseRowsWhile/lvlNew" lvlNew > debug0 "collapseRowsWhile/lvlOld" lvlOld -- # old -- ## new then collapse -- ## old or # old -- # new # new else debug "collapseRowsWhile/replace" $ collapseRowsWhile test $ (new:) $ collapseRoot old olds -- NOTE: old is no HeaderSection, then collapse to any older and loop (NodeHeader HeaderSection{}, _) | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news , (unTree -> (unSourced -> NodeHeader HeaderSection{})) <- sec -> collapseRowsWhile test news' where isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $ \_t0@(unTree -> Sourced (FileRange _f0 b0 _e0:|_s0) n0) _t1 -> case n0 of NodeHeader HeaderSection{} -> False _ -> colInt bn == colInt b0 -- NOTE: in case of alignment, HeaderSection is parent (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse -- NOTE: merge within old NodePara. (_, NodePara) | isAdjacent -> collapse -- _ -> collapse2 -- NOTE: new is either on the left or on the right _ -> collapse where isAdjacent = lineInt bn - lineInt eo <= 1 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds -- | Put a 'Root' as a child of the head 'Root'. -- -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced. -- -- NOTE: any NodeText/NodeText merging must have been done before. collapseRoot :: Root -> Rows -> Rows collapseRoot new@(Tree (Sourced ssn@(FileRange _fn bn en:|_sn) n) _ns) rows = debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $ case rows of [] -> return new old@(Tree (Sourced (FileRange fo bo eo:|so) o) os) : olds -> case (n,o) of -- NOTE: no child into NodeText (_, NodeText{}) -> collapse2 -- NOTE: NodeText can begin a NodePara (NodeText tn, _) | not $ TL.null tn -> case o of -- NOTE: no NodePara within those NodeHeader HeaderEqual{} -> collapse NodeHeader HeaderBar{} -> collapse NodeHeader HeaderDashDash{} -> collapse -- NOTE: NodePara within those NodePara | not isAdjacent -> para NodeHeader{} -> para _ -> collapse -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara. -- Note that since a NodePara is never adjacent to another, -- it is not nested within another. (_, NodePara) | isAdjacent -> case n of -- NOTE: no HeaderSection (even adjacent) within a NodePara NodeHeader HeaderSection{} -> collapse2 _ -> collapse | otherwise -> collapse2 _ -> collapse where isAdjacent = lineInt bn - lineInt eo <= 1 para = Tree (Sourced ssn NodePara) (return new) : rows collapse = Tree (Sourced (FileRange fo bo en:|so) o) (os |> new) : olds collapse2 = collapseRoot new $ collapseRoot old olds