{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE ViewPatterns #-} module Hdoc.TCT.Tree ( module Hdoc.TCT.Tree , Tree(..), Trees ) where import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), any) import Data.Function (($)) import Data.Functor ((<$>)) import Data.Int (Int) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ordering(..), Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence ((|>)) import Data.TreeSeq.Strict (Tree(..), Trees) import Prelude (undefined, Num(..)) import System.FilePath (FilePath) import Text.Show (Show(..)) import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import Hdoc.TCT.Utils import Hdoc.TCT.Cell import Hdoc.TCT.Elem import Hdoc.TCT.Debug -- * Type 'Root' -- | A single 'Tree' to gather all the 'Node's -- simplifies greatly the navigation and transformations, -- especially because the later XML or DTC output -- are themselves a single tree-like data structure. -- -- Also, having a single 'Tree' is easier to merge -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' ('Just' "li") ""))@), -- and XML coming from the second parsing phase (eg. @'NodePair' ('PairElem' ('Just' "li") [])@). -- -- For error reporting, indentation sensitivity and paragraph grouping, -- each 'Node' is annotated with a 'Cell' -- spanning over all its content (sub-'Trees' included). type Root = Tree (Cell Node) type Roots = Trees (Cell Node) pattern Tree0 :: a -> Tree a pattern Tree0 a <- Tree a (null -> True) where Tree0 a = Tree a mempty -- * Type 'Node' data Node = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive) | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive) | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's) | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's) | NodeLower !ElemName !ElemAttrs -- ^ node, @ XML node deriving (Eq,Show) instance Pretty Node -- * Type 'Header' data Header = HeaderColon !(Maybe ElemName) !White -- ^ @name: @ | HeaderEqual !ElemName !White -- ^ @name=@ | HeaderBar !(Maybe ElemName) !White -- ^ @name|@ | HeaderGreat !(Maybe ElemName) !White -- ^ @name>@ | HeaderBrackets !TL.Text -- ^ @[name]@ | HeaderDot !TL.Text -- ^ @1. @ | HeaderDash -- ^ @- @ | HeaderDashDash -- ^ @-- @ | HeaderSection !LevelSection -- ^ @# @ | HeaderDotSlash !FilePath -- ^ @./file @ deriving (Eq, Ord, Show) instance Pretty Header {- -- ** Type 'Name' type Name = TL.Text -} -- ** Type 'LevelSection' type LevelSection = Int -- * Type 'Pair' data Pair = PairElem !ElemName !ElemAttrs -- ^ @\text\@ | PairTag Bool -- ^ @\#text\#@ or @~\#text\#@ | PairAt Bool -- ^ @\@text\@@ or @~\@text\@@ | PairStar -- ^ @*text*@ | PairSlash -- ^ @/text/@ | PairUnderscore -- ^ @_value_@ | PairDash -- ^ @-text-@ | PairBackquote -- ^ @`text`@ | PairSinglequote -- ^ @'text'@ | PairDoublequote -- ^ @"text"@ | PairFrenchquote -- ^ @«text»@ | PairParen -- ^ @(text)@ | PairBrace -- ^ @{text}@ | PairBracket -- ^ @[text]@ deriving (Eq,Ord,Show) instance Pretty Pair -- * Type 'Token' data Token = TokenText !TL.Text | TokenEscape !Char -- ^ @\\char@ | TokenLink !Link | TokenAt !Bool !Ref -- ^ @\@foo@ or @~\@foo@ | TokenTag !Bool !Ref -- ^ @\#foo@ or @~\#foo@ deriving (Eq,Show) {- -- * Type 'Filter' data Filter = FilterAnd !Filter !Filter | FilterOr !Filter !Filter | FilterNot !Filter | FilterTag !Ref | FilterAt !Ref deriving (Eq,Ord,Show) -} -- ** Type 'Link' type Link = TL.Text -- ** Type 'Ref' type Ref = TL.Text -- * Type 'Row' -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line. type Row = [Root] -- ** Type 'Rows' -- | In reverse order: a list of nodes in scope -- (hence to which the next line can append to). type Rows = [Root] -- | Having an initial 'Root' simplifies 'mergeRowIndent': -- one can always put the last 'Root' as a child to a previous one. -- This 'Root' just has to be discarded by 'collapseRows'. initRows :: Rows initRows = [Tree0 $ Sourced (FileRange "" p p :| []) $ NodeHeader HeaderDash] where p = FilePos{filePos_line= -1, filePos_column=0} -- NOTE: such that any following 'Root' -- is 'NodePara' if possible, and always a child. -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be. -- -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly), -- they MAY span over multilines, and they can be many from a single line. -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly), -- they MUST span only over a single and entire line. -- -- This is the main entry point to build 'Rows' by accumulating 'Row' into them. mergeRow :: Rows -> Row -> Rows mergeRow rows row = debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $ mergeRowPrefix 0 rows $ List.reverse row -- | Merge by considering matching prefixes. -- -- 'HeaderGreat' and 'HeaderBar' work, not on indentation, -- but on their vertical alignment as prefixes. -- Hence, each new 'Row' has those prefixes zipped into a single one -- when they match, are aligned and adjacent. mergeRowPrefix :: ColNum -> Rows -> Row -> Rows mergeRowPrefix col rows row = debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $ case (row,rows) of ([], _) -> rows (_, []) -> undefined -- NOTE: cannot happen with initRows ( _new@(Tree (Sourced (FileRange _fn bn _en:|_sn) n) _ns):news , _old@(Tree (Sourced (FileRange _fo _bo eo:|_so) _o) _os):_olds ) -> case collapseRowsWhile isCollapsable rows of [] -> mergeRowIndent rows row head@(unTree -> ch@(Sourced (FileRange _fh bh _eh:|_sh) h)) : olds' -> case (n,h) of -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col -- then check if there is a matching HeaderGreat, -- if so, discard new and restart with a col advanced to new's beginning (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{}) | isAdjacent && isMatching ch -> discard -- NOTE: same for HeaderBar (NodeHeader HeaderBar{}, NodeHeader HeaderBar{}) | isAdjacent && isMatching ch -> discard -- NOTE: collapsing: any other new aligned or on the right of an adjacent head -- makes it collapse entirely (_, NodeHeader HeaderGreat{}) | col < filePos_column bh -> collapse -- NOTE: same for HeaderBar (_, NodeHeader HeaderBar{}) | col < filePos_column bh -> collapse _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row where isAdjacent = filePos_line bn - filePos_line eo <= 1 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (filePos_column bh) rows news collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row where isMatching (Sourced (FileRange _fh bh _eh:|_sh) h) = filePos_column bn == filePos_column bh && n == h isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $ \_t0@(unTree -> c0@(Sourced (FileRange _f0 b0 _e0:|_s0) _n0)) _t1@(unTree -> Sourced (FileRange _f1 b1 e1:|_s1) _n1) -> not (isMatching c0) && (filePos_line b0 - filePos_line e1 <= 1) && -- adjacent col < filePos_column b1 -- righter than col -- | Merge by considering indentation. mergeRowIndent :: Rows -> Row -> Rows mergeRowIndent rows row = debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $ case (row,rows) of ([], _) -> rows (_, []) -> undefined -- NOTE: cannot happen with initRows ( new@(Tree (Sourced ssn@(FileRange fn bn en:|sn) n) ns):news ,old@(Tree (Sourced sso@(FileRange fo bo eo:|so) o) os):olds ) -> case debug0 "mergeRowIndent/colNew" (filePos_column bn) `compare` debug0 "mergeRowIndent/colOld" (filePos_column bo) of -- NOTE: new is on the left LT -> case (n,o) of -- NOTE: merge adjacent NodeText -- first -- second (NodeText tn, NodeText to) | TL.null tn || TL.null to , not isVerbatim -> collapse | isAdjacent && isIndented -> merge $ Tree t (os<>ns) where t = NodeText <$> Sourced (FileRange fo boNew eo:|so) (indent<>to) <> Sourced ssn tn boNew = bo{filePos_column=filePos_column bn} indent = TL.replicate (int64 $ filePos_column bo - filePos_column bn) " " -- | Whether the horizontal delta is made of spaces isIndented = debug0 "mergeRowIndent/isIndented" $ case olds of [] -> True (unTree -> (source -> (fileRange_end -> ep) :| _)) : _ -> case filePos_line ep `compare` filePos_line bo of LT -> True EQ -> filePos_column ep <= filePos_column bn _ -> False _ -> collapse -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: preserve all NodeText "", but still split into two NodePara (NodeText tn, NodeText to) | TL.null tn || TL.null to , not isVerbatim -> collapse | isAdjacent -> merge $ Tree (NodeText <$> Sourced sso to <> Sourced ssn tn) (os<>ns) -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) -> if debug0 "mergeRowIndent/lvlNew" lvlNew > debug0 "mergeRowIndent/lvlOld" lvlOld -- # old -- ## new then concat -- ## old or # old -- # new # new else collapse -- NOTE: old is no HeaderSection, then collapse to any older and loop (NodeHeader HeaderSection{}, _) | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows , (unTree -> (unSourced -> NodeHeader HeaderSection{})) <- sec -> mergeRowIndent rows' row where isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $ \_t0@(unTree -> Sourced (FileRange _f0 b0 _e0:|_ss0) n0) _t1 -> case n0 of NodeHeader HeaderSection{} -> False _ -> filePos_column bn == filePos_column b0 -- NOTE: in case of alignment, HeaderSection is parent (_, NodeHeader HeaderSection{}) -> concat -- _ -> replace -- NOTE: new is on the right GT -> case (n,o) of -- NOTE: keep NodeText "" out of old NodePara (NodeText "", NodePara) -> collapse -- NOTE: merge adjacent NodeText (NodeText tn, NodeText to) -> case isAdjacent of _ | TL.null tn || TL.null to , not isVerbatim -> collapse -- old -- new True -> merge $ Tree (NodeText <$> Sourced sso to <> Sourced ssn tn) (os<>ns) -- old -- -- new False -> mergeRowIndent (collapseRoot old olds) (shifted:news) where shifted = Tree (Sourced (FileRange fn bnNew en:|sn) $ NodeText $ indent<>tn) (os<>ns) bnNew = bn{filePos_column=filePos_column bo} indent = TL.replicate (int64 $ filePos_column bn - filePos_column bo) " " -- _ -> concat where isAdjacent = filePos_line bn - filePos_line eo <= 1 -- | Whether a parent semantic want new to stay a NodeText isVerbatim = any p rows where p (unTree -> (unSourced -> NodeHeader HeaderBar{})) = True p _ = False concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news -- | Like 'mergeRowIndent', but without maintaining the appending, -- hence collapsing all the 'Root's of the given 'Rows'. -- -- NOTE: 'initRows' MUST have been the first 'Rows' -- before calling 'mergeRowIndent' on it to get the given 'Rows'. collapseRows :: Rows -> Roots collapseRows rows = debug1_ "collapseRows" ("rows",rows) $ case collapseRowsWhile (\_new _old -> True) rows of [t] -> subTrees t _ -> undefined -- NOTE: subTrees returns the children of the updated initRows -- | Collapse downto any last HeaderSection, returning it and its level. collapseSection :: ColNum -> Rows -> Rows collapseSection col = debug1 "collapseSection" "rows" go where go rows@(new@(unTree -> Sourced (FileRange _fn bn _en:|_sn) n):olds) | col <= filePos_column bn = case n of NodeHeader HeaderSection{} -> rows _ -> collapseSection col $ collapseRoot new $ go olds go _ = mempty collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case [] -> mempty rows@(new@(Tree (Sourced (FileRange _fn bn _en:|_sn) n) _ns):news) -> case news of [] -> rows old@(Tree (Sourced (FileRange _fo bo eo:|_so) o) _os):olds | not $ test new old -> rows | otherwise -> case debug0 "collapseRowsWhile/colNew" (filePos_column bn) `compare` debug0 "collapseRowsWhile/colOld" (filePos_column bo) of -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) -> if debug0 "collapseRowsWhile/lvlNew" lvlNew > debug0 "collapseRowsWhile/lvlOld" lvlOld -- # old -- ## new then collapse -- ## old or # old -- # new # new else debug "collapseRowsWhile/replace" $ collapseRowsWhile test $ (new:) $ collapseRoot old olds -- NOTE: old is no HeaderSection, then collapse to any older and loop (NodeHeader HeaderSection{}, _) | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news , (unTree -> (unSourced -> NodeHeader HeaderSection{})) <- sec -> collapseRowsWhile test news' where isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $ \_t0@(unTree -> Sourced (FileRange _f0 b0 _e0:|_s0) n0) _t1 -> case n0 of NodeHeader HeaderSection{} -> False _ -> filePos_column bn == filePos_column b0 -- NOTE: in case of alignment, HeaderSection is parent (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse -- NOTE: merge within old NodePara. (_, NodePara) | isAdjacent -> collapse -- _ -> collapse2 -- NOTE: new is either on the left or on the right _ -> collapse where isAdjacent = filePos_line bn - filePos_line eo <= 1 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds -- | Put a 'Root' as a child of the head 'Root'. -- -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced. -- -- NOTE: any NodeText/NodeText merging must have been done before. collapseRoot :: Root -> Rows -> Rows collapseRoot new@(Tree (Sourced ssn@(FileRange _fn bn en:|_sn) n) _ns) rows = debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $ case rows of [] -> return new old@(Tree (Sourced (FileRange fo bo eo:|so) o) os) : olds -> case (n,o) of -- NOTE: no child into NodeText (_, NodeText{}) -> collapse2 -- NOTE: NodeText can begin a NodePara (NodeText tn, _) | not $ TL.null tn -> case o of -- NOTE: no NodePara within those NodeHeader HeaderEqual{} -> collapse NodeHeader HeaderBar{} -> collapse NodeHeader HeaderDashDash{} -> collapse -- NOTE: NodePara within those NodePara | not isAdjacent -> para NodeHeader{} -> para _ -> collapse -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara. -- Note that since a NodePara is never adjacent to another, -- it is not nested within another. (_, NodePara) | isAdjacent -> case n of -- NOTE: no HeaderSection (even adjacent) within a NodePara NodeHeader HeaderSection{} -> collapse2 _ -> collapse | otherwise -> collapse2 _ -> collapse where isAdjacent = filePos_line bn - filePos_line eo <= 1 para = Tree (Sourced ssn NodePara) (return new) : rows collapse = Tree (Sourced (FileRange fo bo en:|so) o) (os |> new) : olds collapse2 = collapseRoot new $ collapseRoot old olds