{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.TCT.Write.XML where import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Functor ((<$>), (<$), ($>)) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|)) import Data.Set (Set) import Data.TreeSeq.Strict (Tree(..), tree0) import Data.Tuple (uncurry) import Prelude (Num(..), undefined) import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import qualified Language.TCT.Write.Plain as Plain -- import Language.TCT.Debug import Language.TCT.Utils import Language.TCT hiding (Parser) import Language.XML import Text.Blaze.XML () -- | Main entry point -- -- NOTE: 'XmlNode' are still annotated with 'Cell', -- but nothing is done to preserve any ordering amongst them, -- because 'Node's sometimes need to be reordered -- (eg. about/title may have a title from the section before, -- hence outside of about). -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting. writeXML :: Roots -> XMLs writeXML doc = -- (`S.evalState` def) $ case Seq.viewl doc of sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot -> let (titles, content) = partitionSection sec in case Seq.viewl titles of (unTree -> Cell st _) :< _ -> xmlify def { inh_titles = titles , inh_figure = True } contentWithAbout <> xmlify def foot where contentWithAbout = case Seq.findIndexL isAbout content of Nothing -> Tree (Cell st $ NodeHeader $ HeaderColon "about" "") mempty <| content Just{} -> content isAbout = \case (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True _ -> False _ -> xmlify def doc _ -> xmlify def doc partitionSection :: Root -> (Roots, Roots) partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) = case Seq.viewl body of EmptyL -> mempty title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest -> let (subtitles, content) = spanlSubtitles et rest in (title <| (subtitles >>= subTrees), content) where spanlSubtitles ep ts = case Seq.viewl ts of sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs | lvlSub <= lvlPar , pos_line span_begin - pos_line ep <= 1 -> let (subs, ts') = spanlSubtitles span_end rs in (sub <| subs, ts') _ -> (mempty, ts) _ -> (mempty, body) partitionSection _ = mempty -- * Type 'Inh' data Inh = Inh { inh_figure :: Bool , inh_para :: [Inh -> Root -> XML] , inh_titles :: Roots } instance Default Inh where def = Inh { inh_figure = False , inh_para = List.repeat elementPara , inh_titles = mempty } -- ** 'inh_para' elementPara :: Inh -> Root -> XML elementPara inh (Tree c ts) = Tree (XmlElem "para" <$ c) $ xmlify inh ts elementTitle :: Inh -> Root -> XML elementTitle inh (Tree c ts) = Tree (XmlElem "title" <$ c) $ xmlify inh ts elementName :: Inh -> Root -> XML elementName inh (Tree c ts) = Tree (XmlElem "name" <$ c) $ xmlify inh ts attributeName :: Inh -> Root -> XML attributeName _inh (Tree c ts) = tree0 (XmlAttr "name" (Plain.writePlain ts) <$ c) -- * Class 'Xmlify' class Xmlify a where xmlify :: Inh -> a -> XMLs instance Xmlify Roots where xmlify inh roots = case Seq.viewl roots of EmptyL -> mempty r@(Tree cr@(Cell _sr nr) ts) :< rs -> case nr of ---------------------- -- NOTE: HeaderColon becomes parent -- of any continuous following-sibling HeaderBar or HeaderGreat NodeHeader (HeaderColon n _wh) | (span, rest) <- spanlHeaderColon rs , not $ null span -> xmlify inh (Tree cr (ts<>span)) <> xmlify inh rest where spanlHeaderColon :: Roots -> (Roots, Roots) spanlHeaderColon = Seq.spanl $ \case Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n _ -> False ---------------------- -- NOTE: gather HeaderBrackets NodeHeader HeaderBrackets{} | (span,rest) <- spanlBrackets roots , not (null span) -> (<| xmlify inh rest) $ element "references" $ span >>= xmlify inh where spanlBrackets :: Roots -> (Roots, Roots) spanlBrackets = Seq.spanl $ \case Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True _ -> False ---------------------- -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case. NodeText x | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs -> xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs' ---------------------- -- NOTE: detect [some text](http://some.url) or [SomeRef] NodePair PairParen | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs -> (<| xmlify inh rs') $ case bracket of (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) -> element "eref" $ xmlAttrs [Cell sl ("to",lnk)] <> xmlify inh ts _ -> element "rref" $ xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <> xmlify inh ts ---------------------- -- NOTE: gather HeaderDash _ | (span, rest) <- spanlItems (==HeaderDash) roots , not $ null span -> (<| xmlify inh rest) $ element "ul" $ span >>= xmlify inh{inh_para=List.repeat elementPara} ---------------------- -- NOTE: gather HeaderDot | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots , not $ null span -> (<| xmlify inh rest) $ element "ol" $ span >>= xmlify inh{inh_para=List.repeat elementPara} where spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots) spanlItems liHeader = Seq.spanl $ \(unTree -> (unCell -> nod)) -> case nod of NodeHeader (HeaderColon "li" _wh) -> True NodeHeader hdr -> liHeader hdr NodePair (PairElem "li" _as) -> True _ -> False ---------------------- NodePara | para:inh_para <- inh_para inh -> para inh r <| -- para (() <$ cr) (xmlify inh ts) <| xmlify inh{inh_para} rs ---------------------- -- NOTE: context-free Root _ -> xmlify inh r <> xmlify inh rs where element :: XmlName -> XMLs -> XML element n = Tree (XmlElem n <$ cr) instance Xmlify Root where xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) = case nod of ---------------------- NodePara -> case inh_para inh of [] -> xmlify inh ts para:_ -> Seq.singleton $ para inh tn -- para (() <$ cn) $ xmlify inh ts ---------------------- NodeHeader hdr -> case hdr of -- HeaderSection{} -> Seq.singleton $ element "section" $ head <> xmlify inh' body where (titles, content) = partitionSection tn (attrs, body) = partitionAttrs content head = case Seq.viewl titles of EmptyL -> mempty title@(unTree -> ct) :< subtitles -> xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <> xmlify inh{inh_para=List.repeat elementTitle} title <> aliases where aliases = subtitles >>= \subtitle@(unTree -> cs) -> return $ Tree (cs $> XmlElem "alias") $ xmlAttrs [cs $> ("id",getAttrId subtitle)] inh' = inh { inh_para = List.repeat elementPara , inh_figure = True } -- HeaderColon n _wh -> let (attrs,body) = partitionAttrs ts in case n of -- NOTE: insert titles into . "about" -> Seq.singleton $ element "about" $ xmlify inh' (inh_titles inh) <> xmlAttrs attrs <> xmlify inh'{inh_figure=False} body -- NOTE: in
mode, unreserved elements become
_ | inh_figure inh && n`List.notElem`elems || TL.null n -> Seq.singleton $ element "figure" $ -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <> xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <> case toList body of [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body -- NOTE: reserved elements _ -> Seq.singleton $ element (xmlLocalName n) $ xmlAttrs attrs <> xmlify inh' body where inh' = inh { inh_para = case n of "about" -> List.repeat elementTitle "reference" -> elementTitle : List.repeat elementPara "serie" -> List.repeat attributeName "author" -> List.repeat attributeName "editor" -> List.repeat attributeName "org" -> List.repeat attributeName "note" -> List.repeat elementPara _ -> [] } -- HeaderBar n wh -> if inh_figure inh && n`List.notElem`elems || TL.null n then Seq.singleton $ element "artwork" $ xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <> xmlify inh{inh_para=[]} ts else xmlify inh $ Tree (cell $ NodeHeader $ HeaderColon n wh) ts -- HeaderGreat n _wh -> Seq.singleton $ let (attrs,body) = partitionAttrs ts in element "quote" $ xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <> xmlify inh{inh_para=List.repeat elementPara} body -- HeaderEqual n _wh -> Seq.singleton $ Tree0 $ cell $ XmlAttr (xmlLocalName n) $ Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts -- HeaderDot n -> Seq.singleton $ element "li" $ let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <> xmlify inh ts -- HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts -- HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $ Plain.writePlain ts -- HeaderBrackets ident -> let (attrs,body) = partitionAttrs ts in Seq.singleton $ element "reference" $ xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) <> xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body where inh' = inh{inh_figure = False} -- HeaderDotSlash _file -> xmlify inh ts ---------------------- NodePair pair -> case pair of PairBracket | to <- Plain.writePlain ts , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to -> Seq.singleton $ element "rref" $ xmlAttrs [cell ("to",to)] PairStar -> Seq.singleton $ element "b" $ xmlify inh ts PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts PairFrenchquote -> Seq.singleton $ element "q" $ xmlify inh ts {- case ts of (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) -> case Seq.viewr ls of m :> Tree0 (Cell br er (TokenPlain r)) -> xmlify inh $ Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))) <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))) _ -> xmlify inh $ Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) -> xmlify inh $ rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r))) _ -> xmlify inh ts -} PairHash -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",Plain.writePlain ts)] PairElem name attrs -> Seq.singleton $ element (xmlLocalName name) $ xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) -> cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <> xmlify inh ts _ -> Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XmlText open) `unionXml` xmlify inh ts `unionXml` Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XmlText close) where (open, close) = pairBorders pair ts bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)} en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)} ---------------------- NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t ---------------------- NodeToken tok -> case tok of TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)] TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)] ---------------------- NodeLower n as -> Seq.singleton $ element "artwork" $ xmlify inh ts where cell :: a -> Cell a cell = Cell ss element :: XmlName -> XMLs -> XML element n = Tree (cell $ XmlElem n) instance Xmlify (Seq (Cell (XmlName,TL.Text))) where xmlify _inh = xmlAttrs -- * Elements -- | Reserved elements' name elems :: Set TL.Text elems = [ "about" , "abstract" , "address" , "alias" , "annotation" , "area" , "artwork" , "aside" , "audio" , "author" , "authors" , "bcp14" , "br" , "call" , "city" , "code" , "comment" , "comments" , "country" , "date" , "dd" , "define" , "del" , "div" , "dl" , "document" , "dt" , "editor" , "email" , "embed" , "eref" , "fax" , "feed" , "feedback" , "figure" , "filter" , "format" , "from" , "h" , "hi" , "html5" , "i" , "index" , "iref" , "keyword" , "li" , "link" , "name" , "note" , "ol" , "organization" , "para" , "postamble" , "preamble" , "q" , "ref" , "reference" , "references" , "region" , "rref" , "sc" , "section" , "serie" , "source" , "span" , "street" , "style" , "sub" , "sup" , "table" , "tbody" , "td" , "tel" , "tfoot" , "title" , "th" , "thead" , "toc" , "tof" , "tr" , "tt" , "u" , "ul" , "uri" , "version" , "video" , "workgroup" , "xml" , "zipcode" ] -- * Attributes -- | Convenient alias, forcing the types xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>) -- | Extract attributes partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots) partitionAttrs ts = (attrs,cs) where (as,cs) = (`Seq.partition` ts) $ \case Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n _ -> False attrs = attr <$> as attr = \case Tree (Cell ssn (NodeHeader (HeaderEqual n _wh))) a -> Cell ssn (xmlLocalName n, v) where v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a _ -> undefined getAttrId :: Root -> TL.Text getAttrId = Plain.writePlain . Seq.singleton setAttr :: Cell (XmlName, TL.Text) -> Seq (Cell (XmlName, TL.Text)) -> Seq (Cell (XmlName, TL.Text)) setAttr a@(unCell -> (k, _v)) as = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just idx -> Seq.update idx a as Nothing -> a <| as defaultAttr :: Seq (Cell (XmlName, TL.Text)) -> Cell (XmlName, TL.Text) -> Seq (Cell (XmlName, TL.Text)) defaultAttr as a@(unCell -> (k, _v)) = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just _idx -> as Nothing -> a <| as -- * Text -- | Unify two 'XMLs', merging border 'XmlText's if any. unionXml :: XMLs -> XMLs -> XMLs unionXml x y = case (Seq.viewr x, Seq.viewl y) of (xs :> x0, y0 :< ys) -> case (x0,y0) of ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XmlText tx)) , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XmlText ty)) ) | fx == fy -> xs `unionXml` Seq.singleton (Tree0 $ (XmlText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml` ys _ -> x <> y (Seq.EmptyR, _) -> y (_, Seq.EmptyL) -> x unionsXml :: Foldable f => f XMLs -> XMLs unionsXml = foldl' unionXml mempty