{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE ViewPatterns #-} module Language.TCT.Tree ( module Language.TCT.Tree , Tree(..), Trees ) where import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), any) import Data.Function (($)) import Data.Functor ((<$>)) import Data.Int (Int) import Data.Monoid (Monoid(..)) import Data.Ord (Ordering(..), Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence ((|>)) import Data.TreeSeq.Strict (Tree(..), Trees) import Prelude (undefined, Num(..)) import System.FilePath (FilePath) import Text.Show (Show(..)) import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import Language.TCT.Utils import Language.TCT.Cell import Language.TCT.Elem import Language.TCT.Debug -- * Type 'Root' -- | A single 'Tree' to rule all the 'Node's -- simplifies greatly the navigation and transformations, -- especially because the later XML or DTC output -- are themselves a single tree-like data structure. -- -- Also, having a single 'Tree' is easier to merge -- XML coming from the first parsing phase (eg. @('NodeHeader' ('HeaderEqual' "li" ""))@), -- and XML coming from the second parsing phase (eg. @NodePair (PairElem "li" [])@). -- -- For error reporting, each 'Node' is annotated with a 'Cell' -- spanning over all its content (sub-'Trees' included). type Root = Tree (Cell Node) type Roots = Trees (Cell Node) pattern Tree0 :: a -> Tree a pattern Tree0 a <- Tree a (null -> True) where Tree0 a = Tree a mempty -- * Type 'Node' data Node = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive) | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive) | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's) | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's) | NodeLower !Name !ElemAttrs -- ^ node, @ XML node | NodeGroup -- ^ node, group trees into a single tree, -- useful to return many trees when only one is expected deriving (Eq,Show) instance Pretty Node -- * Type 'Header' data Header = HeaderColon !Name !White -- ^ @name: @ | HeaderEqual !Name !White -- ^ @name=@ | HeaderBar !Name !White -- ^ @name|@ | HeaderGreat !Name !White -- ^ @name>@ | HeaderBrackets !Name -- ^ @[name]@ | HeaderDot !Name -- ^ @1. @ | HeaderDash -- ^ @- @ | HeaderDashDash -- ^ @-- @ | HeaderSection !LevelSection -- ^ @# @ | HeaderDotSlash !FilePath -- ^ @./file @ deriving (Eq, Ord, Show) instance Pretty Header -- ** Type 'Name' type Name = TL.Text -- ** Type 'LevelSection' type LevelSection = Int -- * Type 'Pair' data Pair = PairElem !ElemName !ElemAttrs -- ^ @value@ | PairHash -- ^ @#value#@ | PairStar -- ^ @*value*@ | PairSlash -- ^ @/value/@ | PairUnderscore -- ^ @_value_@ | PairDash -- ^ @-value-@ | PairBackquote -- ^ @`value`@ | PairSinglequote -- ^ @'value'@ | PairDoublequote -- ^ @"value"@ | PairFrenchquote -- ^ @«value»@ | PairParen -- ^ @(value)@ | PairBrace -- ^ @{value}@ | PairBracket -- ^ @[value]@ deriving (Eq,Ord,Show) instance Pretty Pair -- * Type 'Token' data Token = TokenText !TL.Text | TokenEscape !Char | TokenLink !Link | TokenTag !Tag deriving (Eq,Show) -- ** Type 'Tag' type Tag = TL.Text -- ** Type 'Link' type Link = TL.Text -- * Type 'Row' -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line. type Row = [Root] -- ** Type 'Rows' -- | In reverse order: a list of nodes in scope -- (hence to which the next line can append to). type Rows = [Root] -- | Having an initial 'Root' simplifies 'mergeRowIndent': -- one can always put the last 'Root' as a child to a previous one. -- This 'Root' just has to be discarded by 'collapseRows'. initRows :: Rows initRows = [Tree0 (Cell p p NodeGroup)] where p = pos1{pos_line= -1, pos_column=0} -- NOTE: such that any following 'Root' -- is 'NodePara' if possible, and always a child. -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be. -- -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly), -- they MAY span over multilines, and they can be many from a single line. -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly), -- they MUST span only over a single and entire line. -- -- This is the main entry point to build 'Rows' by accumulating 'Row' into them. mergeRow :: Rows -> Row -> Rows mergeRow rows row = debug2_ "mergeRow" ("news",List.reverse row) ("olds",rows) $ mergeRowPrefix 0 rows $ List.reverse row -- | Merge by considering matching prefixes. -- -- 'HeaderGreat' and 'HeaderBar' work, not on indentation, -- but on their vertical alignment as prefixes. -- Hence, each new 'Row' has those prefixes zipped into a single one -- when they match, are aligned and adjacent. mergeRowPrefix :: ColNum -> Rows -> Row -> Rows mergeRowPrefix col rows row = debug3_ "mergeRowPrefix" ("col",col) ("news",row) ("olds",rows) $ case (row,rows) of ([], _) -> rows (_, []) -> undefined -- NOTE: cannot happen with initRows ( _new@(Tree (Cell bn _en n) _ns):news , _old@(Tree (Cell _bo eo _o) _os):_olds ) -> case collapseRowsWhile isCollapsable rows of [] -> mergeRowIndent rows row head@(unTree -> ch@(Cell bh _eh h)) : olds' -> case (n,h) of -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col -- then check if there is a matching HeaderGreat, -- if so, discard new and restart with a col advanced to new's beginning (NodeHeader HeaderGreat{}, NodeHeader HeaderGreat{}) | isAdjacent && isMatching ch -> discard {- | pos_column bn == pos_column bh , isAdjacent , hn == hh -} -- NOTE: same for HeaderBar (NodeHeader HeaderBar{}, NodeHeader HeaderBar{}) | isAdjacent && isMatching ch -> discard -- NOTE: collapsing: any other new aligned or on the right of an adjacent head -- makes it collapse entirely (_, NodeHeader HeaderGreat{}) | col < pos_column bh -> collapse -- NOTE: same for HeaderBar (_, NodeHeader HeaderBar{}) | col < pos_column bh -> collapse _ -> debug "mergeRowPrefix/indent" $ mergeRowIndent rows row where isAdjacent = pos_line bn - pos_line eo <= 1 discard = debug "mergeRowPrefix/discard" $ mergeRowPrefix (pos_column bh) rows news collapse = debug "mergeRowPrefix/collapse" $ mergeRowPrefix col (collapseRoot head olds') row where isMatching (Cell bh _eh h) = pos_column bn == pos_column bh && n == h isCollapsable = debug2 "mergeRowPrefix/isCollapsable" "new" "old" $ \_t0@(unTree -> c0@(Cell b0 _e0 _n0)) _t1@(unTree -> Cell b1 e1 _n1) -> not (isMatching c0) && (pos_line b0 - pos_line e1 <= 1) && -- adjacent col < pos_column b1 -- righter than col -- | Merge by considering indentation. mergeRowIndent :: Rows -> Row -> Rows mergeRowIndent rows row = debug2_ "mergeRowIndent" ("news",row) ("olds",rows) $ case (row,rows) of ([], _) -> rows (_, []) -> undefined -- NOTE: cannot happen with initRows ( new@(Tree (Cell bn en n) ns):news ,old@(Tree (Cell bo eo o) os):olds ) -> case debug0 "mergeRowIndent/colNew" (pos_column bn) `compare` debug0 "mergeRowIndent/colOld" (pos_column bo) of -- NOTE: new is on the left LT -> case (n,o) of -- NOTE: merge adjacent NodeText -- first -- second (NodeText tn, NodeText to) | TL.null tn || TL.null to , not isVerbatim -> collapse | isAdjacent && isIndented -> merge $ Tree t (os<>ns) where t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn boNew = bo{pos_column=pos_column bn} indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " " -- | Whether the horizontal delta is made of spaces isIndented = debug0 "mergeRowIndent/isIndented" $ case olds of [] -> True (unTree -> cell_end -> ep) : _ -> case pos_line ep `compare` pos_line bo of LT -> True EQ -> pos_column ep <= pos_column bn _ -> False _ -> collapse -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: preserve all NodeText "", but still split into two NodePara (NodeText tn, NodeText to) | TL.null tn || TL.null to , not isVerbatim -> collapse | isAdjacent -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns) -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) -> if debug0 "mergeRowIndent/lvlNew" lvlNew > debug0 "mergeRowIndent/lvlOld" lvlOld -- # old -- ## new then concat -- ## old or # old -- # new # new else collapse -- NOTE: old is no HeaderSection, then collapse to any older and loop (NodeHeader HeaderSection{}, _) -- | rows'@(sec:olds') <- collapseSection (pos_column bn) rows | rows'@(sec:_) <- collapseRowsWhile isCollapsable rows , (unTree -> unCell -> NodeHeader HeaderSection{}) <- sec -> mergeRowIndent rows' row where isCollapsable = debug2 "mergeRowIndent/isCollapsable" "new" "old" $ \_t0@(unTree -> Cell b0 _e0 n0) _t1 -> case n0 of NodeHeader HeaderSection{} -> False _ -> pos_column bn == pos_column b0 -- NOTE: in case of alignment, HeaderSection is parent (_, NodeHeader HeaderSection{}) -> concat -- _ -> replace -- NOTE: new is on the right GT -> case (n,o) of -- NOTE: only same line Root can be pushed on HeaderBar -- DELME: (_, NodeHeader HeaderBar{}) | pos_column bn /= pos_column eo -> collapse -- NOTE: keep NodeText "" out of old NodePara (NodeText "", NodePara) -> collapse -- NOTE: merge adjacent NodeText (NodeText tn, NodeText to) -> case isAdjacent of _ | TL.null tn || TL.null to , not isVerbatim -> collapse -- old -- new True -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns) -- old -- -- new False -> mergeRowIndent (collapseRoot old olds) (shifted:news) where shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns) bnNew = bn{pos_column=pos_column bo} indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " " -- _ -> concat where isAdjacent = pos_line bn - pos_line eo <= 1 -- | Whether a parent semantic want new to stay a NodeText isVerbatim = any p rows where p (unTree -> unCell -> NodeHeader HeaderBar{}) = True p _ = False concat = debug "mergeRowIndent/concat" $ List.reverse row <> rows merge m = debug "mergeRowIndent/merge" $ mergeRowIndent (m : olds) news collapse = debug "mergeRowIndent/collapse" $ mergeRowIndent (collapseRoot old olds) row replace = debug "mergeRowIndent/replace" $ mergeRowIndent (new : collapseRoot old olds) news -- | Like 'mergeRowIndent', but without maintaining the appending, -- hence collapsing all the 'Root's of the given 'Rows'. -- -- NOTE: 'initRows' MUST have been the first 'Rows' -- before calling 'mergeRowIndent' on it to get the given 'Rows'. collapseRows :: Rows -> Roots collapseRows rows = debug1_ "collapseRows" ("rows",rows) $ case collapseRowsWhile (\_new _old -> True) rows of [t] -> subTrees t _ -> undefined -- NOTE: subTrees returns the children of the updated initRows -- | Collapse downto any last HeaderSection, returning it and its level. collapseSection :: ColNum -> Rows -> Rows collapseSection col = debug1 "collapseSection" "rows" go where go rows@(new@(unTree -> Cell bn _en n):olds) | col <= pos_column bn = case n of NodeHeader HeaderSection{} -> rows _ -> collapseSection col $ collapseRoot new $ go olds go _ = mempty collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case [] -> mempty rows@(new@(Tree (Cell bn _en n) _ns):news) -> case news of [] -> rows old@(Tree (Cell bo eo o) _os):olds | not $ test new old -> rows | otherwise -> case debug0 "collapseRowsWhile/colNew" (pos_column bn) `compare` debug0 "collapseRowsWhile/colOld" (pos_column bo) of -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), NodeHeader (HeaderSection lvlOld)) -> if debug0 "collapseRowsWhile/lvlNew" lvlNew > debug0 "collapseRowsWhile/lvlOld" lvlOld -- # old -- ## new then collapse -- ## old or # old -- # new # new else debug "collapseRowsWhile/replace" $ collapseRowsWhile test $ (new:) $ collapseRoot old olds -- NOTE: old is no HeaderSection, then collapse to any older and loop (NodeHeader HeaderSection{}, _) | news'@(sec:_) <- debug0 "collapseRowsWhile/section" $ collapseRowsWhile isCollapsable news , (unTree -> unCell -> NodeHeader HeaderSection{}) <- sec -> collapseRowsWhile test news' where isCollapsable = debug2 "collapseRowsWhile/isCollapsable" "new" "old" $ \_t0@(unTree -> Cell b0 _e0 n0) _t1 -> case n0 of NodeHeader HeaderSection{} -> False _ -> pos_column bn == pos_column b0 -- NOTE: in case of alignment, HeaderSection is parent (_, NodeHeader HeaderSection{}) -> debug "collapseRowsWhile/section/parent" collapse -- NOTE: merge within old NodePara. (_, NodePara) | isAdjacent -> collapse -- _ -> collapse2 -- NOTE: new is either on the left or on the right _ -> collapse where isAdjacent = pos_line bn - pos_line eo <= 1 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new $ news collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old $ olds -- | Put a 'Root' as a child of the head 'Root'. -- -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced. -- NOTE: any NodeText/NodeText merging must have been done before. collapseRoot :: Root -> Rows -> Rows collapseRoot new@(Tree (Cell bn en n) _ns) rows = debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $ case rows of [] -> return new old@(Tree (Cell bo eo o) os) : olds -> case (n,o) of -- NOTE: no child into NodeText (_, NodeText{}) -> collapse2 -- NOTE: NodeText can begin a NodePara (NodeText tn, _) | not $ TL.null tn -> case o of -- NOTE: no NodePara within those NodeHeader HeaderEqual{} -> collapse NodeHeader HeaderBar{} -> collapse NodeHeader HeaderDashDash{} -> collapse -- NOTE: NodePara within those NodePara | not isAdjacent -> para NodeHeader{} -> para NodeGroup -> para _ -> collapse -- NOTE: amongst remaining nodes, only adjacent ones may enter an old NodePara. -- Note that since a NodePara is never adjacent to another, -- it is not nested within another. (_, NodePara) | isAdjacent -> case n of -- NOTE: no HeaderSection (even adjacent) within a NodePara NodeHeader HeaderSection{} -> collapse2 _ -> collapse | otherwise -> collapse2 _ -> collapse where isAdjacent = pos_line bn - pos_line eo <= 1 para = Tree (Cell bn en NodePara) (return new) : rows collapse = Tree (Cell bo en o) (os |> new) : olds collapse2 = collapseRoot new $ collapseRoot old olds