{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.TCT.Write.XML where import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Functor ((<$>), (<$), ($>)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|)) import Data.Set (Set) import Data.TreeSeq.Strict (Tree(..), tree0) import Data.Tuple (uncurry) import Prelude (Num(..), undefined) import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import qualified Language.TCT.Write.Plain as Plain import qualified System.FilePath as FP -- import Language.TCT.Debug import Language.TCT.Utils import Language.TCT hiding (Parser) import Language.XML import Text.Blaze.XML () -- | Main entry point -- -- NOTE: 'XmlNode' are still annotated with 'Cell', -- but nothing is done to preserve any ordering amongst them, -- because 'Node's sometimes need to be reordered -- (eg. about/title may have a title from the section before, -- hence outside of about). -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting. document :: Roots -> XMLs document doc = -- (`S.evalState` def) $ case Seq.viewl doc of sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot -> let (titles, content) = partitionSection sec in case Seq.viewl titles of (unTree -> Cell bt et _) :< _ -> xmlify def { inh_titles = titles , inh_figure = True } contentWithAbout <> xmlify def foot where contentWithAbout = case Seq.findIndexL isAbout content of Nothing -> Tree (Cell bt et $ NodeHeader $ HeaderColon "about" "") mempty <| content Just{} -> content isAbout = \case (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True _ -> False _ -> xmlify def doc _ -> xmlify def doc partitionSection :: Root -> (Roots, Roots) partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) = case Seq.viewl body of EmptyL -> mempty title@(unTree -> Cell _bt et NodePara) :< rest -> let (subtitles, content) = spanlSubtitles et rest in (title <| (subtitles >>= subTrees), content) where spanlSubtitles ep ts = case Seq.viewl ts of sub@(unTree -> Cell bs es (NodeHeader (HeaderSection lvlSub))) :< rs | lvlSub <= lvlPar , pos_line bs - pos_line ep <= 1 -> let (subs, ts') = spanlSubtitles es rs in (sub <| subs, ts') _ -> (mempty, ts) _ -> (mempty, body) partitionSection _ = mempty -- * Type 'Inh' data Inh = Inh { inh_figure :: Bool , inh_para :: [Inh -> Root -> XML] , inh_titles :: Roots } instance Default Inh where def = Inh { inh_figure = False , inh_para = List.repeat elementPara , inh_titles = mempty } -- ** 'inh_para' elementPara :: Inh -> Root -> XML elementPara inh (Tree c ts) = Tree (XmlElem "para" <$ c) $ xmlify inh ts elementTitle :: Inh -> Root -> XML elementTitle inh (Tree c ts) = Tree (XmlElem "title" <$ c) $ xmlify inh ts elementName :: Inh -> Root -> XML elementName inh (Tree c ts) = Tree (XmlElem "name" <$ c) $ xmlify inh ts attributeName :: Inh -> Root -> XML attributeName _inh (Tree c ts) = tree0 (XmlAttr "name" (Plain.document ts) <$ c) -- * Class 'Xmlify' class Xmlify a where xmlify :: Inh -> a -> XMLs instance Xmlify Roots where xmlify inh roots = case Seq.viewl roots of EmptyL -> mempty r@(Tree cr@(Cell _br _er nr) ts) :< rs -> case nr of ---------------------- -- NOTE: HeaderColon becomes parent -- of any continuous following-sibling HeaderBar or HeaderGreat NodeHeader (HeaderColon n _wh) | (span, rest) <- spanlHeaderColon rs , not $ null span -> xmlify inh (Tree cr (ts<>span)) <> xmlify inh rest where spanlHeaderColon :: Roots -> (Roots, Roots) spanlHeaderColon = Seq.spanl $ \case Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n _ -> False ---------------------- -- NOTE: gather HeaderBrackets NodeHeader HeaderBrackets{} | (span,rest) <- spanlBrackets roots , not (null span) -> (<| xmlify inh rest) $ element "references" $ span >>= xmlify inh where spanlBrackets :: Roots -> (Roots, Roots) spanlBrackets = Seq.spanl $ \case Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True _ -> False ---------------------- -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case. NodeText x | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs -> xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs' ---------------------- -- NOTE: detect [some text](http://some.url) or [SomeRef] NodePair PairParen | Tree (Cell bb eb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs -> (<| xmlify inh rs') $ case bracket of (toList -> [unTree -> Cell bl el (NodeToken (TokenLink lnk))]) -> element "eref" $ xmlAttrs [Cell bl el ("to",lnk)] <> xmlify inh ts _ -> element "rref" $ xmlAttrs [Cell bb eb ("to",Plain.document bracket)] <> xmlify inh ts ---------------------- -- NOTE: gather HeaderDash _ | (span, rest) <- spanlItems (==HeaderDash) roots , not $ null span -> (<| xmlify inh rest) $ element "ul" $ span >>= xmlify inh{inh_para=List.repeat elementPara} ---------------------- -- NOTE: gather HeaderDot | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots , not $ null span -> (<| xmlify inh rest) $ element "ol" $ span >>= xmlify inh{inh_para=List.repeat elementPara} where spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots) spanlItems liHeader = Seq.spanl $ \(unTree -> (unCell -> nod)) -> case nod of NodeHeader (HeaderColon "li" _wh) -> True NodeHeader hdr -> liHeader hdr NodePair (PairElem "li" _as) -> True _ -> False ---------------------- NodePara | para:inh_para <- inh_para inh -> para inh r <| -- para (() <$ cr) (xmlify inh ts) <| xmlify inh{inh_para} rs ---------------------- -- NOTE: context-free Root _ -> xmlify inh r <> xmlify inh rs where element :: XmlName -> XMLs -> XML element n = Tree (XmlElem n <$ cr) instance Xmlify Root where xmlify inh tn@(Tree (Cell bn en nod) ts) = case nod of ---------------------- NodePara -> case inh_para inh of [] -> xmlify inh ts para:_ -> Seq.singleton $ para inh tn -- para (() <$ cn) $ xmlify inh ts ---------------------- NodeHeader hdr -> case hdr of -- HeaderSection{} -> Seq.singleton $ element "section" $ head <> xmlify inh' body where (titles, content) = partitionSection tn (attrs, body) = partitionAttrs content head = case Seq.viewl titles of EmptyL -> mempty title@(unTree -> ct) :< subtitles -> xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <> xmlify inh{inh_para=List.repeat elementTitle} title <> aliases where aliases = subtitles >>= \subtitle@(unTree -> cs) -> return $ Tree (cs $> XmlElem "alias") $ xmlAttrs [cs $> ("id",getAttrId subtitle)] inh' = inh { inh_para = List.repeat elementPara , inh_figure = True } -- HeaderColon n _wh -> let (attrs,body) = partitionAttrs ts in case n of -- NOTE: insert titles into . "about" -> Seq.singleton $ element "about" $ xmlify inh' (inh_titles inh) <> xmlAttrs attrs <> xmlify inh'{inh_figure=False} body -- NOTE: in
mode, unreserved elements become
_ | inh_figure inh && n`List.notElem`elems || TL.null n -> Seq.singleton $ element "figure" $ -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <> xmlAttrs (attrs `defaultAttr` Cell bn bn ("type", n)) <> case toList body of [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body -- NOTE: reserved elements _ -> Seq.singleton $ element (xmlLocalName n) $ xmlAttrs attrs <> xmlify inh' body where inh' = inh { inh_para = case n of "about" -> List.repeat elementTitle "reference" -> elementTitle : List.repeat elementPara "serie" -> List.repeat attributeName "author" -> List.repeat attributeName "editor" -> List.repeat attributeName "org" -> List.repeat attributeName "note" -> List.repeat elementPara _ -> [] } -- HeaderBar n wh -> if inh_figure inh && n`List.notElem`elems || TL.null n then Seq.singleton $ element "artwork" $ xmlAttrs (Seq.singleton $ Cell bn bn ("type", n)) <> xmlify inh{inh_para=[]} ts else xmlify inh $ Tree (Cell bn en $ NodeHeader $ HeaderColon n wh) ts -- HeaderGreat n _wh -> Seq.singleton $ let (attrs,body) = partitionAttrs ts in element "quote" $ xmlAttrs (attrs `defaultAttr` Cell bn bn ("type", n)) <> xmlify inh{inh_para=List.repeat elementPara} body -- HeaderEqual n _wh -> Seq.singleton $ Tree0 $ cell $ XmlAttr (xmlLocalName n) $ Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts -- HeaderDot n -> Seq.singleton $ element "li" $ xmlAttrs (Seq.singleton $ Cell bn bn{pos_column=pos_column bn + int (TL.length n)} ("name", n)) <> xmlify inh ts -- HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts -- HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $ Plain.document ts -- HeaderBrackets ident -> let (attrs,body) = partitionAttrs ts in Seq.singleton $ element "reference" $ xmlAttrs (setAttr (Cell en en ("id",ident)) attrs) <> xmlify inh'{inh_para = elementTitle : elementTitle : List.repeat elementPara} body where inh' = inh{inh_figure = False} -- HeaderDotSlash p -> Seq.singleton $ element "include" $ xmlAttrs [cell ("href",TL.pack $ FP.replaceExtension p "dtc")] <> xmlify inh ts ---------------------- NodePair pair -> case pair of PairBracket | to <- Plain.document ts , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to -> Seq.singleton $ element "rref" $ xmlAttrs [cell ("to",to)] PairStar -> Seq.singleton $ element "b" $ xmlify inh ts PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts PairFrenchquote -> Seq.singleton $ element "q" $ xmlify inh ts {- case ts of (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) -> case Seq.viewr ls of m :> Tree0 (Cell br er (TokenPlain r)) -> xmlify inh $ Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))) <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))) _ -> xmlify inh $ Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) -> xmlify inh $ rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r))) _ -> xmlify inh ts -} PairHash -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",Plain.document ts)] PairElem name attrs -> Seq.singleton $ element (xmlLocalName name) $ xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) -> cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <> xmlify inh ts _ -> Seq.singleton (Tree0 $ Cell bn bn' $ XmlText open) `unionXml` xmlify inh ts `unionXml` Seq.singleton (Tree0 $ Cell en' en $ XmlText close) where (open, close) = pairBorders pair ts bn' = bn{pos_column=pos_column bn + int (TL.length open)} en' = en{pos_column=pos_column bn - int (TL.length close)} ---------------------- NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t ---------------------- NodeToken tok -> case tok of TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)] TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)] ---------------------- NodeLower n as -> Seq.singleton $ element "artwork" $ xmlify inh ts where cell :: a -> Cell a cell = Cell bn en element :: XmlName -> XMLs -> XML element n = Tree (cell $ XmlElem n) instance Xmlify (Seq (Cell (XmlName,TL.Text))) where xmlify _inh = xmlAttrs -- * Elements -- | Reserved elements' name elems :: Set TL.Text elems = [ "about" , "abstract" , "address" , "alias" , "annotation" , "area" , "artwork" , "aside" , "audio" , "author" , "authors" , "bcp14" , "br" , "call" , "city" , "code" , "comment" , "comments" , "country" , "date" , "dd" , "define" , "del" , "div" , "dl" , "document" , "dt" , "editor" , "email" , "embed" , "eref" , "fax" , "feed" , "feedback" , "figure" , "filter" , "format" , "from" , "h" , "hi" , "html5" , "i" , "index" , "iref" , "keyword" , "li" , "link" , "name" , "note" , "ol" , "organization" , "para" , "postamble" , "preamble" , "q" , "ref" , "reference" , "references" , "region" , "rref" , "sc" , "section" , "serie" , "source" , "span" , "street" , "style" , "sub" , "sup" , "table" , "tbody" , "td" , "tel" , "tfoot" , "title" , "th" , "thead" , "toc" , "tof" , "tr" , "tt" , "u" , "ul" , "uri" , "version" , "video" , "workgroup" , "xml" , "zipcode" ] -- * Attributes -- | Convenient alias, forcing the types xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>) -- | Extract attributes partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots) partitionAttrs ts = (attrs,cs) where (as,cs) = (`Seq.partition` ts) $ \case Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n _ -> False attrs = attr <$> as attr = \case Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a -> Cell bp ep (xmlLocalName n, v) where v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a _ -> undefined getAttrId :: Root -> TL.Text getAttrId = Plain.document . Seq.singleton setAttr :: Cell (XmlName, TL.Text) -> Seq (Cell (XmlName, TL.Text)) -> Seq (Cell (XmlName, TL.Text)) setAttr a@(unCell -> (k, _v)) as = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just idx -> Seq.update idx a as Nothing -> a <| as defaultAttr :: Seq (Cell (XmlName, TL.Text)) -> Cell (XmlName, TL.Text) -> Seq (Cell (XmlName, TL.Text)) defaultAttr as a@(unCell -> (k, _v)) = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just _idx -> as Nothing -> a <| as -- * Text -- | Unify two 'XMLs', merging border 'XmlText's if any. unionXml :: XMLs -> XMLs -> XMLs unionXml x y = case (Seq.viewr x, Seq.viewl y) of (xs :> x0, y0 :< ys) -> case (x0,y0) of ( Tree0 (Cell bx ex (XmlText tx)) , Tree0 (Cell by ey (XmlText ty)) ) -> xs `unionXml` Seq.singleton (Tree0 $ (XmlText <$>) $ Cell bx ex tx <> Cell by ey ty) `unionXml` ys _ -> x <> y (Seq.EmptyR, _) -> y (_, Seq.EmptyL) -> x unionsXml :: Foldable f => f XMLs -> XMLs unionsXml = foldl' unionXml mempty