{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE ViewPatterns #-} module Language.TCT.Tree ( module Language.TCT.Tree , Tree(..), Trees ) where import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($)) import Data.Functor ((<$>)) import Data.Int (Int) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ordering(..), Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence ((|>), ViewR(..)) import Data.TreeSeq.Strict (Tree(..), Trees) import Prelude (undefined, Num(..)) import System.FilePath (FilePath) import Text.Show (Show(..)) import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import Language.TCT.Utils import Language.TCT.Cell import Language.TCT.Elem import Language.TCT.Debug -- * Type 'Root' -- | A single 'Tree' to rule all the 'Node's -- simplifies the navigation and transformations. -- -- For error reporting, each 'Node' is annotated with a 'Cell' -- spanning over all its content (sub-'Trees' included). type Root = Tree (Cell Node) type Roots = Trees (Cell Node) pattern Tree0 :: a -> Tree a pattern Tree0 a <- Tree a (null -> True) where Tree0 a = Tree a mempty -- * Type 'Node' data Node = NodeHeader !Header -- ^ node, from first parsing (indentation-sensitive) | NodeText !TL.Text -- ^ leaf verbatim text, from first parsing (indentation-sensitive) | NodePair !Pair -- ^ node, from second parsing (on some 'NodeText's) | NodeToken !Token -- ^ leaf, from second parsing (on some 'NodeText's) | NodeLower !Name !ElemAttrs -- ^ node, @ XML node | NodeGroup -- ^ node, group trees into a single tree, -- useful to return many trees when only one is expected deriving (Eq,Show) instance Pretty Node -- * Type 'Header' data Header = HeaderColon !Name !White -- ^ @name: @ | HeaderEqual !Name !White -- ^ @name=@ | HeaderBar !Name !White -- ^ @name|@ | HeaderGreat !Name !White -- ^ @name>@ | HeaderBrackets !Name -- ^ @[name]@ | HeaderDot !Name -- ^ @1. @ | HeaderDash -- ^ @- @ | HeaderDashDash -- ^ @-- @ | HeaderSection !LevelSection -- ^ @# @ | HeaderDotSlash !FilePath -- ^ @./file @ deriving (Eq, Ord, Show) instance Pretty Header -- ** Type 'Name' type Name = TL.Text -- ** Type 'LevelSection' type LevelSection = Int -- * Type 'Pair' data Pair = PairElem !ElemName !ElemAttrs -- ^ @value@ | PairHash -- ^ @#value#@ | PairStar -- ^ @*value*@ | PairSlash -- ^ @/value/@ | PairUnderscore -- ^ @_value_@ | PairDash -- ^ @-value-@ | PairBackquote -- ^ @`value`@ | PairSinglequote -- ^ @'value'@ | PairDoublequote -- ^ @"value"@ | PairFrenchquote -- ^ @«value»@ | PairParen -- ^ @(value)@ | PairBrace -- ^ @{value}@ | PairBracket -- ^ @[value]@ deriving (Eq,Ord,Show) instance Pretty Pair -- * Type 'Token' data Token = TokenText !TL.Text | TokenEscape !Char | TokenLink !Link | TokenTag !Tag deriving (Eq,Show) -- ** Type 'Tag' type Tag = TL.Text -- ** Type 'Link' type Link = TL.Text -- * Type 'Row' -- | In normal order: a list of 'Header's, maybe ended by 'Value', all read on the same line. type Row = [Root] -- ** Type 'Rows' -- | In reverse order: a list of nodes in scope -- (hence to which the next line can append to). type Rows = [Root] -- | Having an initial 'Root' simplifies 'appendRow': -- one can always put the last 'Root' as a child to a previous one. -- This 'Root' just has to be discarded by 'collapseRows'. initRows :: Rows initRows = [Tree0 (Cell p p NodeGroup)] where p = pos1{pos_line= -1, pos_column=0} -- NOTE: such that any following 'Root' -- is 'NodePara' if possible, and always a child. -- | @appendRow rows row@ appends @row@ to @rows@. -- -- [@rows@] parent 'Rows', from closest to farthest (non-strictly descending) -- [@row@] next 'Row', from leftest column to rightest (non-stricly ascending) appendRow :: Rows -> Row -> Rows appendRow rows row = debug2_ "appendRow" ("news",row) ("olds",rows) $ case (row,rows) of (_, []) -> undefined -- NOTE: cannot happen with initRows ([], _) -> rows (new@(Tree (Cell bn en n) ns):news, old@(Tree (Cell bo eo o) os):olds) -> case debug0 "appendRow/colNew" (pos_column bn) `compare` debug0 "appendRow/colOld" (pos_column bo) of -- NOTE: new is vertically lower LT -> case (n,o) of -- NOTE: merge adjacent NodeText -- first -- second (NodeText tn, NodeText to) | TL.null tn || TL.null to -> child | not isNewPara && isIndented -> merge $ Tree t (os<>ns) where t = NodeText <$> Cell boNew eo (indent<>to) <> Cell bn en tn boNew = bo{pos_column=pos_column bn} indent = TL.replicate (int64 $ pos_column bo - pos_column bn) " " -- | Whether the horizontal diff is made of spaces isIndented = debug0 "appendRow/isIndented" $ case olds of [] -> True (unTree -> cell_end -> ep) : _ -> case pos_line ep `compare` pos_line bo of LT -> True EQ -> pos_column ep <= pos_column bn _ -> False _ -> child -- NOTE: new is vertically aligned EQ -> case (n,o) of -- NOTE: preserve all NodeText "", but still split into two NodePara (NodeText tn, NodeText to) | TL.null tn || TL.null to -> child | not isNewPara -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns) -- NOTE: HeaderSection can parent Nodes at the same level (NodeHeader (HeaderSection lvlNew), _) | Just (lvlOld, rows'@(old':olds')) <- collapseSection (pos_column bn) rows -> if debug0 "appendRow/lvlNew" lvlNew > debug0 "appendRow/lvlOld" lvlOld then -- # old -- ## new {-concat-} List.reverse row <> rows' else -- ## old or # old -- # new # new {-child old'-} appendRow (appendChild old' olds') row -- NOTE: concat everything else following a HeaderSection. (_, NodeHeader HeaderSection{}) -> concat {- (NodeHeader ho@HeaderGreat{}, NodeHeader hn) | ho == hn -> debug "appendRow/HeaderGreat" $ appendRow rows news -} -- _ -> replace -- NOTE: new is vertically greater GT -> case (n,o) of -- NOTE: keep NodeText "" out of old NodePara (NodeText "", NodePara) -> child -- NOTE: merge adjacent NodeText (NodeText tn, NodeText to) -> case isNewPara of _ | TL.null tn || TL.null to -> child -- old -- -- new True -> appendRow (appendChild old olds) (shifted:news) where shifted = Tree (Cell bnNew en $ NodeText $ indent<>tn) (os<>ns) bnNew = bn{pos_column=pos_column bo} indent = TL.replicate (int64 $ pos_column bn - pos_column bo) " " -- old -- new False -> merge $ Tree (NodeText <$> Cell bo eo to <> Cell bn en tn) (os<>ns) -- _ -> concat where isNewPara = pos_line bn - pos_line eo > 1 concat = debug "appendRow/concat" $ List.reverse row <> rows merge m = debug "appendRow/merge" $ appendRow (m : olds) news child = debug "appendRow/child" $ appendRow (appendChild old olds) row replace = debug "appendRow/replace" $ appendRow (new : appendChild old olds) news -- | Collapse downto any last HeaderSection, returning it and its level. collapseSection :: ColNum -> Rows -> Maybe (LevelSection,Rows) collapseSection col = debug1 "collapseSection" "rows" go where go rows@(new@(unTree -> Cell bn _en n):olds) | col == pos_column bn = case n of NodeHeader (HeaderSection lvl) -> return (lvl, rows) _ -> (appendChild new <$>) <$> go olds go _ = Nothing -- | Like 'appendRow', but without maintaining the appending, -- hence collapsing all the 'Root's of the given 'Rows'. -- -- NOTE: 'initRows' MUST have been the first 'Rows' -- before calling 'appendRow' on it to get the given 'Rows'. collapseRows :: Rows -> Roots collapseRows = debug1 "collapseRows" "rows" $ \case [] -> mempty new@(Tree (Cell bn _en n) _ns):olds -> case olds of [] -> subTrees new old@(Tree (Cell bo eo o) _os):oldss -> case debug0 "colNew" (pos_column bn) `compare` debug0 "colOld" (pos_column bo) of -- NOTE: new is vertically aligned EQ -> case (n,o) of (NodeHeader (HeaderSection lvlNew), _) | Just (lvlOld, old':olds') <- collapseSection (pos_column bn) olds -> if debug0 "collapseRows/lvlNew" lvlNew > debug0 "collapseRows/lvlOld" lvlOld then -- # old -- ## new {-child new-} collapseRows $ appendChild new $ old':olds' else -- ## old or # old -- # new # new {-child old'-} collapseRows $ new:appendChild old' olds' -- NOTE: in case of alignment, HeaderSection is parent. (_, NodeHeader HeaderSection{}) -> child -- NOTE: merge within old NodePara. (_, NodePara{}) | not isNewPara -> child -- _ -> child2 -- NOTE: new is either vertically lower or greater _ -> child where isNewPara = pos_line bn - pos_line eo > 1 child, child2 :: Roots child = debug "collapseRows/child" $ collapseRows $ appendChild new olds child2 = debug "collapseRows/child2" $ collapseRows $ appendChild new $ appendChild old oldss -- | Put a 'Root' as a child of the head 'Root'. -- -- NOTE: 'appendChild' is where 'NodePara' may be introduced. -- NOTE: any NodeText/NodeText merging must have been done before. appendChild :: Root -> Rows -> Rows appendChild new@(Tree (Cell bn en n) _ns) rows = debug2_ "appendChild" ("new",Seq.singleton new) ("rows",rows) $ case rows of [] -> return new old@(Tree (Cell bo eo o) os) : olds -> case (n,o) of -- NOTE: never put a NodePara directly within another (NodePara, NodePara) -> child2 -- NOTE: never put a child to NodeText (_, NodeText{}) -> child2 -- NOTE: NodeText can begin a NodePara (NodeText tn, _) | not $ TL.null tn -> case o of -- NOTE: no NodePara within those NodeHeader HeaderEqual{} -> child NodeHeader HeaderBar{} -> child NodeHeader HeaderDashDash{} -> child -- NOTE: NodePara within those NodePara | isNewPara -> para NodeHeader{} -> para NodeGroup -> para _ -> child _ -> child where isNewPara = pos_line bn - pos_line eo > 1 child = Tree (Cell bo en o) (os |> new) : olds child2 = appendChild new $ appendChild old olds para = Tree (Cell bn en NodePara) (return new) : rows -- | Return a 'Tree' from a 'Cell' node and 'subTrees', -- while adjusting the node's 'cell_end' -- with the last 'Tree' of the 'subTrees'. tree :: Cell a -> Trees (Cell a) -> Tree (Cell a) tree (Cell bp ep a) ts = Tree (Cell bp ep' a) ts where ep' = case Seq.viewr ts of EmptyR -> ep _ :> (unTree -> cell_end -> p) -> p