{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE ViewPatterns #-} module Language.TCT.Tree ( module Language.TCT.Tree , Tree(..), Trees ) where import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..), any) import Data.Function (($)) import Data.Functor ((<$>)) import Data.Int (Int) import Data.Monoid (Monoid(..)) import Data.Ord (Ordering(..), Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence ((|>), ViewR(..)) import Data.TreeSeq.Strict (Tree(..), Trees) import Prelude (undefined, Num(..)) import System.FilePath (FilePath) import Text.Show (Show(..)) import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import Language.TCT.Utils import Language.TCT.Cell import Language.TCT.Elem import Language.TCT.Debug -- * Type 'Root' -- | A single 'Tree' to rule all the 'Node's -- simplifies the navigation and transformations. -- -- For error reporting, each 'Node' is annotated with a 'Cell' -- spanning over all its content (sub-'Trees' included). type Root = Tree (Cell Node) type Roots = Trees (Cell Node) pattern Tree0 :: a -> Tree a pattern Tree0 a <- Tree a (null -> True) where Tree0 a = Tree a mempty -- * Type 'Node' data Node = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive) | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive) | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's) | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's) | NodeLower !Name !ElemAttrs -- ^ node, @ XML node | NodeGroup -- ^ node, group trees into a single tree, -- useful to return many trees when only one is expected deriving (Eq,Show) instance Pretty Node -- * Type 'Header' data Header = HeaderColon !Name !White -- ^ @name: @ | HeaderEqual !Name !White -- ^ @name=@ | HeaderBar !Name !White -- ^ @name|@ | HeaderGreat !Name !White -- ^ @name>@ | HeaderBrackets !Name -- ^ @[name]@ | HeaderDot !Name -- ^ @1. @ | HeaderDash -- ^ @- @ | HeaderDashDash -- ^ @-- @ | HeaderSection !LevelSection -- ^ @# @ | HeaderDotSlash !FilePath -- ^ @./file @ deriving (Eq, Ord, Show) instance Pretty Header -- ** Type 'Name' type Name = TL.Text -- ** Type 'LevelSection' type LevelSection = Int -- * Type 'Pair' data Pair = PairElem !ElemName !ElemAttrs -- ^ @value@ | PairHash -- ^ @#value#@ | PairStar -- ^ @*value*@ | PairSlash -- ^ @/value/@ | PairUnderscore -- ^ @_value_@ | PairDash -- ^ @-value-@ | PairBackquote -- ^ @`value`@ | PairSinglequote -- ^ @'value'@ | PairDoublequote -- ^ @"value"@ | PairFrenchquote -- ^ @«value»@ | PairParen -- ^ @(value)@ | PairBrace -- ^ @{value}@ | PairBracket -- ^ @[value]@ deriving (Eq,Ord,Show) instance Pretty Pair -- * Type 'Token' data Token = TokenText !TL.Text | TokenEscape !Char | TokenLink !Link | TokenTag !Tag deriving (Eq,Show) -- ** Type 'Tag' type Tag = TL.Text -- ** Type 'Link' type Link = TL.Text -- * Type 'Row' -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line. type Row = [Root] -- ** Type 'Rows' -- | In reverse order: a list of nodes in scope -- (hence to which the next line can append to). type Rows = [Root] -- | Having an initial 'Root' simplifies 'appendRow': -- one can always put the last 'Root' as a child to a previous one. -- This 'Root' just has to be discarded by 'collapseRows'. initRows :: Rows initRows = [Tree0 (Cell p p NodeGroup)] where p = pos1{pos_line= -1, pos_column=0} -- NOTE: such that any following 'Root' -- is 'NodePara' if possible, and always a child. -- | @mergeRow rows row@ append @row@ into @rows@, while merging what has to be. -- -- * [@rows@] is old 'Rows', its |Root|s' 'cell_begin' are descending (non-strictly), -- they MAY span over multilines, and they can be many from a single line. -- * [@row@] is new 'Row', its |Root|s' 'cell_begin' are descending (non-strictly), -- they MUST span only over a single and entire line. -- -- This is the main entry point to build 'Rows' by accumulating 'Row' into them. mergeRow :: Rows -> Row -> Rows mergeRow rows row = debug2_ "mergeRow" ("news",row) ("olds",rows) $ zipRow 0 rows $ List.reverse row -- | 'HeaderGreat' and 'HeaderBar' work, not on indentation, -- but on their vertical alignment as prefixes. -- Hence, each new 'Row' has those prefixes zipped into a single one -- when they match and are aligned. zipRow :: ColNum -> Rows -> Row -> Rows zipRow col rows row = debug3_ "zipRow" ("col",col) ("news",row) ("olds",rows) $ case (row,rows) of ([], _) -> rows (_, []) -> undefined -- NOTE: cannot happen with initRows ( _new@(Tree (Cell bn _en n) _ns):news , _old@(Tree (Cell _bo eo _o) _os):_olds ) -> case collapseRowsWhile isCollapsable rows of [] -> appendRow rows row head@(unTree -> Cell bh _eh h) : olds' -> case (n,h) of -- NOTE: zipping: when new is HeaderGreat, collapse last line downto col -- then check if there is a matching HeaderGreat, -- if so, discard new and restart with a col advanced to new's beginning (NodeHeader hn@HeaderGreat{}, NodeHeader hh@HeaderGreat{}) | pos_column bn == pos_column bh , isAdjacent , hn == hh -> discard -- NOTE: same for HeaderBar (NodeHeader hn@HeaderBar{}, NodeHeader hh@HeaderBar{}) | pos_column bn == pos_column bh , isAdjacent , hn == hh -> discard -- NOTE: collapsing: any other new aligned or on the right of an adjacent head -- makes it collapse entirely (_, NodeHeader HeaderGreat{}) | col < pos_column bh -> collapse -- NOTE: same for HeaderBar (_, NodeHeader HeaderBar{}) | col < pos_column bh -> collapse _ -> debug "zipRow/append" $ appendRow rows row where discard = debug "zipRow/discard" $ zipRow (pos_column bh) rows news collapse = debug "zipRow/collapse" $ zipRow col (collapseRoot head olds') row isAdjacent = pos_line bn - pos_line eo <= 1 where isCollapsable = -- debug2 "zipRow/isCollapsable" "new" "old" $ \_new@(unTree -> Cell bn _en _n) _old@(unTree -> Cell bo eo _o) -> (pos_line bn - pos_line eo <= 1) && -- adjacent col < pos_column bo -- righter than col appendRow :: Rows -> Row -> Rows appendRow rows row = debug2_ "appendRow" ("news",row) ("olds",rows) $ case (row,rows) of ([], _) -> rows (_, []) -> undefined -- NOTE: cannot happen with initRows ( new@(Tree (Cell bn en n) ns):news ,old@(Tree (Cell bo eo o) os):olds ) -> case debug0 "appendRow/colNew" (pos_column bn) `compare` debug0 "appendRow/colOld" (pos_column bo) of -- NOTE: new is on the left LT -> case (n,o) of -- NOTE: merge adjacent NodeText -- first -- second (NodeText tn, NodeText to) | TL.null tn || TL.null to , not isVerbatim -> collapse | isAdjacent && isIndented -> merge $ Tree t (os<>ns) where t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn boNew = bo{pos_column=pos_column bn} indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " " -- | Whether the horizontal delta is made of spaces isIndented = debug0 "appendRow/isIndented" $ case olds of [] -> True (unTree -> cell_end -> ep) : _ -> case pos_line ep `compare` pos_line bo of LT -> True EQ -> pos_column ep <= pos_column bn _ -> False _ -> collapse -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: preserve all NodeText "", but still split into two NodePara (NodeText tn, NodeText to) | TL.null tn || TL.null to , not isVerbatim -> collapse | isAdjacent -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns) -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), _) | rows'@(old':olds') <- collapseRowsWhile isCollapsable rows , (unTree -> unCell -> NodeHeader (HeaderSection lvlOld)) <- old' -> if debug0 "appendRow/lvlNew" lvlNew > debug0 "appendRow/lvlOld" lvlOld then -- # old' -- ## new {-concat using old'-} List.reverse row <> rows' else -- ## old' or # old' -- # new # new {-collapse using old'-} appendRow (collapseRoot old' olds') row where isCollapsable = -- debug2 "appendRow/isCollapsable" "new" "old" $ \_new _old@(unTree -> Cell bt _et t) -> case t of NodeHeader HeaderSection{} -> False _ -> pos_column bt == pos_column bn -- NOTE: in case of alignment, HeaderSection is parent (_, NodeHeader HeaderSection{}) -> concat -- _ -> replace -- NOTE: new is on the right GT -> case (n,o) of -- NOTE: only same line Root can be pushed on HeaderBar -- DELME: (_, NodeHeader HeaderBar{}) | pos_column bn /= pos_column eo -> collapse -- NOTE: keep NodeText "" out of old NodePara (NodeText "", NodePara) -> collapse -- NOTE: merge adjacent NodeText (NodeText tn, NodeText to) -> case isAdjacent of _ | TL.null tn || TL.null to , not isVerbatim -> collapse -- old -- new True -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns) -- old -- -- new False -> appendRow (collapseRoot old olds) (shifted:news) where shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns) bnNew = bn{pos_column=pos_column bo} indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " " -- _ -> concat where isAdjacent = pos_line bn - pos_line eo <= 1 -- | Whether a parent semantic want new to stay a NodeText isVerbatim = any p rows where p (unTree -> unCell -> NodeHeader HeaderBar{}) = True p _ = False concat = debug "appendRow/concat" $ List.reverse row <> rows merge m = debug "appendRow/merge" $ appendRow (m : olds) news collapse = debug "appendRow/collapse" $ appendRow (collapseRoot old olds) row replace = debug "appendRow/replace" $ appendRow (new : collapseRoot old olds) news -- | Like 'appendRow', but without maintaining the appending, -- hence collapsing all the 'Root's of the given 'Rows'. -- -- NOTE: 'initRows' MUST have been the first 'Rows' -- before calling 'appendRow' on it to get the given 'Rows'. collapseRows :: Rows -> Roots collapseRows rows = case collapseRowsWhile (\_new _old -> True) rows of [t] -> subTrees t _ -> undefined -- NOTE: subTrees returns the children of the updated initRows collapseRowsWhile :: (Root -> Root -> Bool) -> Rows -> Rows collapseRowsWhile test = debug1 "collapseRowsWhile" "rows" $ \case [] -> mempty rows@(new@(Tree (Cell bn _en n) _ns):olds) -> case olds of [] -> rows -- old@(Tree (Cell bo eo o) _os):oldss | not $ test new old -> rows | otherwise -> case debug0 "colNew" (pos_column bn) `compare` debug0 "colOld" (pos_column bo) of -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), _) | old':olds' <- collapseRowsWhile isCollapsable olds , (unTree -> unCell -> NodeHeader (HeaderSection lvlOld)) <- old' -> if debug0 "collapseRowsWhile/lvlNew" lvlNew > debug0 "collapseRowsWhile/lvlOld" lvlOld then -- # old' -- ## new collapseRowsWhile test $ collapseRoot new $ old':olds' else -- ## old' or # old' -- # new # new collapseRowsWhile test $ new:collapseRoot old' olds' where isCollapsable = \_new _old@(unTree -> Cell bt _et t) -> case t of NodeHeader HeaderSection{} -> False _ -> pos_column bt == pos_column bn -- NOTE: in case of alignment, HeaderSection is parent (_, NodeHeader HeaderSection{}) -> collapse -- NOTE: merge within old NodePara. (_, NodePara{}) | isAdjacent -> collapse -- _ -> collapse2 -- NOTE: new is either on the left or on the right _ -> collapse where isAdjacent = pos_line bn - pos_line eo <= 1 collapse = debug "collapseRowsWhile/collapse" $ collapseRowsWhile test $ collapseRoot new olds collapse2 = debug "collapseRowsWhile/collapse2" $ collapseRowsWhile test $ collapseRoot new $ collapseRoot old oldss -- | Put a 'Root' as a child of the head 'Root'. -- -- NOTE: 'collapseRoot' is where 'NodePara' may be introduced. -- NOTE: any NodeText/NodeText merging must have been done before. collapseRoot :: Root -> Rows -> Rows collapseRoot new@(Tree (Cell bn en n) _ns) rows = debug2_ "collapseRoot" ("new",Seq.singleton new) ("rows",rows) $ case rows of [] -> return new old@(Tree (Cell bo eo o) os) : olds -> case (n,o) of -- NOTE: never put a NodePara directly within another (NodePara, NodePara) -> collapse2 -- NOTE: never put a collapse to NodeText, except some NodeHeader to preserve them (_, NodeText{}) -> collapse2 -- NOTE: NodeText can begin a NodePara (NodeText tn, _) | not $ TL.null tn -> case o of -- NOTE: no NodePara within those NodeHeader HeaderEqual{} -> collapse NodeHeader HeaderBar{} -> collapse NodeHeader HeaderDashDash{} -> collapse -- NOTE: NodePara within those NodePara | not isAdjacent -> para NodeHeader{} -> para NodeGroup -> para _ -> collapse _ -> collapse where isAdjacent = pos_line bn - pos_line eo <= 1 para = Tree (Cell bn en NodePara) (return new) : rows collapse = Tree (Cell bo en o) (os |> new) : olds collapse2 = collapseRoot new $ collapseRoot old olds -- | Return a 'Tree' from a 'Cell' node and 'subTrees', -- while adjusting the node's 'cell_end' -- with the last 'Tree' of the 'subTrees'. tree :: Cell a -> Trees (Cell a) -> Tree (Cell a) tree (Cell bp ep a) ts = Tree (Cell bp ep' a) ts where ep' = case Seq.viewr ts of EmptyR -> ep _ :> (unTree -> cell_end -> p) -> p