{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.TCT.Write.XML where import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Functor ((<$>), (<$)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|)) import Data.Set (Set) import Data.TreeSeq.Strict (Tree(..)) import Data.Tuple (uncurry) import Prelude (undefined) import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import qualified Language.TCT.Write.Plain as Plain import qualified System.FilePath as FP import Text.Blaze.XML () import Language.TCT hiding (Parser) -- import Language.TCT.Debug import Language.XML -- | Main entry point -- -- NOTE: 'XmlNode' are still annotated with 'Cell', -- but nothing is done to preserve any ordering amongst them, -- because 'Node's sometimes need to be reordered -- (eg. about/title may have a title from the section before, -- hence outside of about). -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting. xmlDocument :: Roots -> XMLs xmlDocument doc = -- (`S.evalState` def) $ case Seq.viewl doc of Tree (unCell -> NodeHeader HeaderSection{}) body :< foot -> case Seq.viewl body of title@(unTree -> Cell bt et NodePara{}) :< content -> xmlify def { inh_titles = return title , inh_figure = True } contentWithAbout <> xmlify def foot where contentWithAbout = case Seq.findIndexL isAbout content of Nothing -> Tree (Cell bt et $ NodeHeader $ HeaderColon "about" "") mempty <| content Just{} -> content isAbout = \case (unTree -> unCell -> NodeHeader (HeaderColon "about" _wh)) -> True _ -> False _ -> xmlify def doc _ -> xmlify def doc {- -- * Type 'Xmls' type Xmls = S.State State XMLs type Xml = S.State State XML instance Semigroup Xmls where (<>) = liftA2 (<>) instance Monoid Xmls where mempty = return mempty mappend = (<>) -- * Type 'State' data State = State { state_pos :: Pos } instance Default State where def = State { state_pos = pos1 } -} -- * Type 'Inh' data Inh = Inh { inh_figure :: Bool , inh_para :: [Cell () -> XMLs -> XML] , inh_titles :: Roots } instance Default Inh where def = Inh { inh_figure = False , inh_para = List.repeat xmlPara , inh_titles = mempty } -- ** 'inh_para' xmlPara :: Cell a -> XMLs -> XML xmlPara c = Tree (XmlElem "para" <$ c) xmlTitle :: Cell a -> XMLs -> XML xmlTitle c = Tree (XmlElem "title" <$ c) xmlName :: Cell a -> XMLs -> XML xmlName c = Tree (XmlElem "name" <$ c) -- * Class 'Xmlify' class Xmlify a where xmlify :: Inh -> a -> XMLs instance Xmlify Roots where xmlify inh roots = case Seq.viewl roots of EmptyL -> mempty r@(Tree cr@(Cell _br _er nr) ts) :< rs -> case nr of ---------------------- -- NOTE: HeaderColon becomes parent -- of any continuous following-sibling HeaderBar or HeaderGreat NodeHeader (HeaderColon n _wh) | (span, rest) <- spanlHeaderColon rs , not $ null span -> xmlify inh $ Tree cr (ts<>span) <| rest where spanlHeaderColon :: Roots -> (Roots, Roots) spanlHeaderColon = Seq.spanl $ \case Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n _ -> False ---------------------- -- NOTE: gather HeaderBrackets NodeHeader HeaderBrackets{} | (span,rest) <- spanlBrackets roots , not (null span) -> (<| xmlify inh rest) $ element "references" $ span >>= xmlify inh where spanlBrackets :: Roots -> (Roots, Roots) spanlBrackets = Seq.spanl $ \case Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True _ -> False ---------------------- -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case. NodeText x | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs -> xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs' ---------------------- -- NOTE: detect [some text](http://some.url) or [SomeRef] NodePair PairParen | Tree (Cell bb eb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs -> (<| xmlify inh rs') $ case bracket of (toList -> [unTree -> Cell bl el (NodeToken (TokenLink lnk))]) -> element "eref" $ xmlAttrs [Cell bl el ("to",lnk)] <> xmlify inh ts _ -> element "rref" $ xmlAttrs [Cell bb eb ("to",Plain.plainDocument bracket)] <> xmlify inh ts ---------------------- -- NOTE: gather HeaderDash _ | (span, rest) <- spanlItems (==HeaderDash) roots , not $ null span -> (<| xmlify inh rest) $ element "ul" $ span >>= xmlify inh{inh_para=List.repeat xmlPara} ---------------------- -- NOTE: gather HeaderDot | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots , not $ null span -> (<| xmlify inh rest) $ element "ol" $ span >>= xmlify inh{inh_para=List.repeat xmlPara} where spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots) spanlItems liHeader = Seq.spanl $ \(unTree -> unCell -> nod) -> case nod of NodeHeader (HeaderColon "li" _wh) -> True NodeHeader hdr -> liHeader hdr NodePair (PairElem "li" _as) -> True _ -> False ---------------------- -- NOTE: context-free Root _ -> xmlify inh r <> xmlify inh rs where element :: XmlName -> XMLs -> XML element n = Tree (XmlElem n <$ cr) {- t@(Tree (NodePair (PairElem))) :< ts -> case inh_para inh of [] -> xmlify inh t <> go inh ts _ | isTokenElem toks -> xmlify inh t <> go inh ts tree0:inh_para -> (case Seq.viewl toks of EmptyL -> id (unTree -> cell_begin -> bp) :< _ -> (tree0 bp (xmlify inh t) <|)) $ go inh{inh_para} ts -} instance Xmlify Root where xmlify inh (Tree cel@(Cell bp ep nod) ts) = case nod of NodeGroup -> xmlify inh ts ---------------------- NodePara -> case inh_para inh of [] -> xmlify inh ts para:inh_para -> Seq.singleton $ para (() <$ cel) $ xmlify inh{inh_para} ts ---------------------- NodeHeader hdr -> case hdr of -- HeaderSection{} -> let (attrs,body) = partitionAttrs ts in Seq.singleton $ element "section" $ xmlAttrs (attrs `defaultAttr` Cell ep ep ("id",getAttrId body)) <> xmlify inh' body where inh' = inh { inh_para = xmlTitle : List.repeat xmlPara , inh_figure = True } -- HeaderColon n _wh -> let (attrs,body) = partitionAttrs ts in case n of -- NOTE: insert titles into . "about" -> Seq.singleton $ element "about" $ (inh_titles inh >>= xmlify inh') <> xmlAttrs attrs <> xmlify inh body -- NOTE: in
mode, unreserved nodes become
_ | inh_figure inh && not (n`List.elem`elems) -> Seq.singleton $ element "figure" $ xmlAttrs (setAttr (Cell ep ep ("type",n)) attrs) <> case toList body of [Tree0{}] -> xmlify inh'{inh_para = List.repeat xmlPara} body _ -> xmlify inh'{inh_para = xmlTitle : List.repeat xmlPara} body -- NOTE: reserved nodes _ -> Seq.singleton $ element (xmlLocalName n) $ xmlAttrs attrs <> xmlify inh' ts where inh' = inh { inh_para = case n of "about" -> xmlTitle : List.repeat xmlPara "reference" -> xmlTitle : List.repeat xmlPara "serie" -> List.repeat xmlName "author" -> List.repeat xmlName "editor" -> List.repeat xmlName "org" -> List.repeat xmlName _ -> [] } ---------------------- HeaderBar n _wh -> Seq.singleton $ element "artwork" $ xmlAttrs (Seq.singleton $ Cell bp bp ("type", if TL.null n then "txt" else n)) <> xmlify inh{inh_para=[]} ts ---------------------- HeaderGreat n _wh -> Seq.singleton $ let (attrs,body) = partitionAttrs ts in element "artwork" $ xmlAttrs (attrs `defaultAttr` Cell bp bp ("type", if TL.null n then "quote" else n)) <> xmlify inh{inh_para=[]} body -- HeaderEqual n _wh -> Seq.singleton $ element (xmlLocalName n) $ xmlify inh ts -- HeaderDot _n -> Seq.singleton $ element "li" $ xmlify inh ts -- HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts -- HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $ -- debug1_ ("TS", ts) $ -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $ Plain.plainDocument ts -- Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing {- TreeSeq.mapAlsoNode (cell1 . unCell) (\_k -> fmap $ TreeSeq.mapAlsoNode (cell1 . unCell) (\_k' -> cell1 . unCell)) <$> ts -} -- HeaderBrackets ident -> let (attrs,body) = partitionAttrs ts in Seq.singleton $ element "reference" $ xmlAttrs (setAttr (Cell ep ep ("id",ident)) attrs) <> xmlify inh'{inh_para = xmlTitle : xmlTitle : List.repeat xmlPara} body where inh' = inh{inh_figure = False} -- HeaderDotSlash p -> Seq.singleton $ element "include" $ xmlAttrs [cell ("href",TL.pack $ FP.replaceExtension p "dtc")] <> xmlify inh ts ---------------------- NodePair pair -> case pair of PairBracket | to <- Plain.plainDocument ts , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to -> Seq.singleton $ element "rref" $ xmlAttrs [cell ("to",to)] PairStar -> Seq.singleton $ element "b" $ xmlify inh ts PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts PairFrenchquote -> Seq.singleton $ element "q" $ xmlify inh ts {- case ts of (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) -> case Seq.viewr ls of m :> Tree0 (Cell br er (TokenPlain r)) -> xmlify inh $ Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))) <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))) _ -> xmlify inh $ Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) -> xmlify inh $ rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r))) _ -> xmlify inh ts -} PairHash -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",Plain.plainDocument ts)] PairElem name attrs -> Seq.singleton $ element (xmlLocalName name) $ xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) -> cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <> xmlify inh ts _ -> let (open, close) = pairBorders pair ts in Seq.singleton (Tree0 $ Cell bp bp $ XmlText open) `unionXml` xmlify inh ts `unionXml` Seq.singleton (Tree0 $ Cell ep ep $ XmlText close) ---------------------- NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t ---------------------- NodeToken tok -> case tok of TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)] TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)] ---------------------- NodeLower n as -> Seq.singleton $ element "artwork" $ xmlify inh ts where cell :: a -> Cell a cell = Cell bp ep element :: XmlName -> XMLs -> XML element n = Tree (cell $ XmlElem n) instance Xmlify (Seq (Cell (XmlName,TL.Text))) where xmlify _inh = xmlAttrs -- * Elements -- | Reserved elements' name elems :: Set TL.Text elems = [ "about" , "abstract" , "address" , "alias" , "annotation" , "area" , "artwork" , "aside" , "audio" , "author" , "authors" , "bcp14" , "br" , "call" , "city" , "code" , "comment" , "comments" , "country" , "date" , "dd" , "define" , "del" , "div" , "dl" , "document" , "dt" , "editor" , "email" , "embed" , "eref" , "fax" , "feed" , "feedback" , "figure" , "filter" , "format" , "from" , "h" , "hi" , "html5" , "i" , "index" , "iref" , "keyword" , "li" , "link" , "name" , "note" , "ol" , "organization" , "para" , "postamble" , "preamble" , "q" , "ref" , "reference" , "references" , "region" , "rref" , "sc" , "section" , "serie" , "source" , "span" , "street" , "style" , "sub" , "sup" , "table" , "tbody" , "td" , "tel" , "tfoot" , "title" , "th" , "thead" , "toc" , "tof" , "tr" , "tt" , "u" , "ul" , "uri" , "version" , "video" , "workgroup" , "xml" , "zipcode" ] -- * Attributes -- | Convenient alias, forcing the types xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>) -- | Extract attributes partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots) partitionAttrs ts = (attrs,cs) where (as,cs) = (`Seq.partition` ts) $ \case Tree (unCell -> NodeHeader (HeaderEqual (TL.null -> False) _wh)) _cs -> True _ -> False attrs = attr <$> as attr = \case Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a -> Cell bp ep (xmlLocalName n, v) where v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a _ -> undefined getAttrId :: Roots -> TL.Text getAttrId ts = case Seq.viewl ts of EmptyL -> "" t :< _ -> Plain.plainDocument $ Seq.singleton t setAttr :: Cell (XmlName, TL.Text) -> Seq (Cell (XmlName, TL.Text)) -> Seq (Cell (XmlName, TL.Text)) setAttr a@(unCell -> (k, _v)) as = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just idx -> Seq.update idx a as Nothing -> a <| as defaultAttr :: Seq (Cell (XmlName, TL.Text)) -> Cell (XmlName, TL.Text) -> Seq (Cell (XmlName, TL.Text)) defaultAttr as a@(unCell -> (k, _v)) = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just _idx -> as Nothing -> a <| as -- * Text -- | Unify two 'XMLs', merging border 'XmlText's if any. unionXml :: XMLs -> XMLs -> XMLs unionXml x y = case (Seq.viewr x, Seq.viewl y) of (xs :> x0, y0 :< ys) -> case (x0,y0) of ( Tree0 (Cell bx ex (XmlText tx)) , Tree0 (Cell by ey (XmlText ty)) ) -> xs `unionXml` Seq.singleton (Tree0 $ (XmlText <$>) $ Cell bx ex tx <> Cell by ey ty) `unionXml` ys _ -> x <> y (Seq.EmptyR, _) -> y (_, Seq.EmptyL) -> x unionsXml :: Foldable f => f XMLs -> XMLs unionsXml = foldl' unionXml mempty