{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.TCT.Write.XML where import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Functor ((<$>), (<$), ($>)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|)) import Data.Set (Set) import Data.TreeSeq.Strict (Tree(..)) import Data.Tuple (uncurry) import Prelude (Num(..), undefined) import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import qualified Language.TCT.Write.Plain as Plain import qualified System.FilePath as FP -- import Language.TCT.Debug import Language.TCT hiding (Parser) import Language.XML import Text.Blaze.XML () -- | Main entry point -- -- NOTE: 'XmlNode' are still annotated with 'Cell', -- but nothing is done to preserve any ordering amongst them, -- because 'Node's sometimes need to be reordered -- (eg. about/title may have a title from the section before, -- hence outside of about). -- Still holding: @'cell_begin' < 'cell_end'@ and 'Cell' nesting. document :: Roots -> XMLs document doc = -- (`S.evalState` def) $ case Seq.viewl doc of sec@(Tree (unCell -> NodeHeader HeaderSection{}) _body) :< foot -> let (titles, content) = partitionSection sec in case Seq.viewl titles of (unTree -> Cell bt et _) :< _ -> xmlify def { inh_titles = titles , inh_figure = True } contentWithAbout <> xmlify def foot where contentWithAbout = case Seq.findIndexL isAbout content of Nothing -> Tree (Cell bt et $ NodeHeader $ HeaderColon "about" "") mempty <| content Just{} -> content isAbout = \case (unTree -> (unCell -> NodeHeader (HeaderColon "about" _wh))) -> True _ -> False _ -> xmlify def doc _ -> xmlify def doc partitionSection :: Root -> (Roots, Roots) partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) = case Seq.viewl body of EmptyL -> mempty title@(unTree -> Cell _bt et NodePara) :< rest -> let (subtitles, content) = spanlSubtitles et rest in (title <| (subtitles >>= subTrees), content) where spanlSubtitles ep ts = case Seq.viewl ts of sub@(unTree -> Cell bs es (NodeHeader (HeaderSection lvlSub))) :< rs | lvlSub <= lvlPar , pos_line bs - pos_line ep <= 1 -> let (subs, ts') = spanlSubtitles es rs in (sub <| subs, ts') _ -> (mempty, ts) _ -> (mempty, body) partitionSection _ = mempty -- * Type 'Inh' data Inh = Inh { inh_figure :: Bool , inh_para :: [Cell () -> XMLs -> XML] , inh_titles :: Roots } instance Default Inh where def = Inh { inh_figure = False , inh_para = List.repeat xmlPara , inh_titles = mempty } -- ** 'inh_para' xmlPara :: Cell a -> XMLs -> XML xmlPara c = Tree (XmlElem "para" <$ c) xmlTitle :: Cell a -> XMLs -> XML xmlTitle c = Tree (XmlElem "title" <$ c) xmlName :: Cell a -> XMLs -> XML xmlName c = Tree (XmlElem "name" <$ c) -- * Class 'Xmlify' class Xmlify a where xmlify :: Inh -> a -> XMLs instance Xmlify Roots where xmlify inh roots = case Seq.viewl roots of EmptyL -> mempty r@(Tree cr@(Cell _br _er nr) ts) :< rs -> case nr of ---------------------- -- NOTE: HeaderColon becomes parent -- of any continuous following-sibling HeaderBar or HeaderGreat NodeHeader (HeaderColon n _wh) | (span, rest) <- spanlHeaderColon rs , not $ null span -> xmlify inh $ Tree cr (ts<>span) <| rest where spanlHeaderColon :: Roots -> (Roots, Roots) spanlHeaderColon = Seq.spanl $ \case Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n _ -> False ---------------------- -- NOTE: gather HeaderBrackets NodeHeader HeaderBrackets{} | (span,rest) <- spanlBrackets roots , not (null span) -> (<| xmlify inh rest) $ element "references" $ span >>= xmlify inh where spanlBrackets :: Roots -> (Roots, Roots) spanlBrackets = Seq.spanl $ \case Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True _ -> False ---------------------- -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case. NodeText x | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs -> xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs' ---------------------- -- NOTE: detect [some text](http://some.url) or [SomeRef] NodePair PairParen | Tree (Cell bb eb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs -> (<| xmlify inh rs') $ case bracket of (toList -> [unTree -> Cell bl el (NodeToken (TokenLink lnk))]) -> element "eref" $ xmlAttrs [Cell bl el ("to",lnk)] <> xmlify inh ts _ -> element "rref" $ xmlAttrs [Cell bb eb ("to",Plain.document bracket)] <> xmlify inh ts ---------------------- -- NOTE: gather HeaderDash _ | (span, rest) <- spanlItems (==HeaderDash) roots , not $ null span -> (<| xmlify inh rest) $ element "ul" $ span >>= xmlify inh{inh_para=List.repeat xmlPara} ---------------------- -- NOTE: gather HeaderDot | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots , not $ null span -> (<| xmlify inh rest) $ element "ol" $ span >>= xmlify inh{inh_para=List.repeat xmlPara} where spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots) spanlItems liHeader = Seq.spanl $ \(unTree -> (unCell -> nod)) -> case nod of NodeHeader (HeaderColon "li" _wh) -> True NodeHeader hdr -> liHeader hdr NodePair (PairElem "li" _as) -> True _ -> False ---------------------- NodePara | para:inh_para <- inh_para inh -> para (() <$ cr) (xmlify inh ts) <| xmlify inh{inh_para} rs ---------------------- -- NOTE: context-free Root _ -> xmlify inh r <> xmlify inh rs where element :: XmlName -> XMLs -> XML element n = Tree (XmlElem n <$ cr) {- t@(Tree (NodePair (PairElem))) :< ts -> case inh_para inh of [] -> xmlify inh t <> go inh ts _ | isTokenElem toks -> xmlify inh t <> go inh ts tree0:inh_para -> (case Seq.viewl toks of EmptyL -> id (unTree -> cell_begin -> bp) :< _ -> (tree0 bp (xmlify inh t) <|)) $ go inh{inh_para} ts -} instance Xmlify Root where xmlify inh tn@(Tree cn@(Cell bn en nod) ts) = case nod of NodeGroup -> xmlify inh ts ---------------------- NodePara -> case inh_para inh of [] -> xmlify inh ts para:_ -> Seq.singleton $ para (() <$ cn) $ xmlify inh ts ---------------------- NodeHeader hdr -> case hdr of -- HeaderSection{} -> Seq.singleton $ element "section" $ head <> xmlify inh' body where (titles, content) = partitionSection tn (attrs, body) = partitionAttrs content head = case Seq.viewl titles of EmptyL -> mempty title@(unTree -> ct) :< subtitles -> xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <> xmlify inh{inh_para=List.repeat xmlTitle} title <> aliases where aliases = subtitles >>= \subtitle@(unTree -> cs) -> return $ Tree (cs $> XmlElem "alias") $ xmlAttrs [cs $> ("id",getAttrId subtitle)] inh' = inh { inh_para = List.repeat xmlPara , inh_figure = True } -- HeaderColon n _wh -> let (attrs,body) = partitionAttrs ts in case n of -- NOTE: insert titles into . "about" -> Seq.singleton $ element "about" $ xmlify inh' (inh_titles inh) <> xmlAttrs attrs <> xmlify inh' body -- NOTE: in
mode, unreserved nodes become
_ | inh_figure inh && n`List.notElem`elems || TL.null n -> Seq.singleton $ element "figure" $ -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <> xmlAttrs (attrs `defaultAttr` Cell bn bn ("type", n)) <> case toList body of [Tree0{}] -> xmlify inh'{inh_para = List.repeat xmlPara} body _ -> xmlify inh'{inh_para = xmlTitle : List.repeat xmlPara} body -- NOTE: reserved nodes _ -> Seq.singleton $ element (xmlLocalName n) $ xmlAttrs attrs <> xmlify inh' body where inh' = inh { inh_para = case n of "about" -> List.repeat xmlTitle "reference" -> xmlTitle : List.repeat xmlPara "serie" -> List.repeat xmlName "author" -> List.repeat xmlName "editor" -> List.repeat xmlName "org" -> List.repeat xmlName _ -> [] } -- HeaderBar n _wh -> Seq.singleton $ element "artwork" $ xmlAttrs (Seq.singleton $ Cell bn bn ("type", n)) <> xmlify inh{inh_para=[]} ts -- HeaderGreat n _wh -> Seq.singleton $ let (attrs,body) = partitionAttrs ts in element "quote" $ xmlAttrs (attrs `defaultAttr` Cell bn bn ("type", n)) <> xmlify inh{inh_para=List.repeat xmlPara} body -- HeaderEqual n _wh -> Seq.singleton $ Tree0 $ cell $ XmlAttr (xmlLocalName n) $ Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts -- HeaderDot _n -> Seq.singleton $ element "li" $ xmlify inh ts -- HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts -- HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XmlComment $ -- debug1_ ("TS", ts) $ -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $ Plain.document ts -- Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing {- TreeSeq.mapAlsoNode (cell1 . unCell) (\_k -> fmap $ TreeSeq.mapAlsoNode (cell1 . unCell) (\_k' -> cell1 . unCell)) <$> ts -} -- HeaderBrackets ident -> let (attrs,body) = partitionAttrs ts in Seq.singleton $ element "reference" $ xmlAttrs (setAttr (Cell en en ("id",ident)) attrs) <> xmlify inh'{inh_para = xmlTitle : xmlTitle : List.repeat xmlPara} body where inh' = inh{inh_figure = False} -- HeaderDotSlash p -> Seq.singleton $ element "include" $ xmlAttrs [cell ("href",TL.pack $ FP.replaceExtension p "dtc")] <> xmlify inh ts ---------------------- NodePair pair -> case pair of PairBracket | to <- Plain.document ts , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to -> Seq.singleton $ element "rref" $ xmlAttrs [cell ("to",to)] PairStar -> Seq.singleton $ element "b" $ xmlify inh ts PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts PairFrenchquote -> Seq.singleton $ element "q" $ xmlify inh ts {- case ts of (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) -> case Seq.viewr ls of m :> Tree0 (Cell br er (TokenPlain r)) -> xmlify inh $ Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))) <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))) _ -> xmlify inh $ Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) -> xmlify inh $ rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r))) _ -> xmlify inh ts -} PairHash -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",Plain.document ts)] PairElem name attrs -> Seq.singleton $ element (xmlLocalName name) $ xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) -> cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <> xmlify inh ts _ -> let (open, close) = pairBorders pair ts in Seq.singleton (Tree0 $ Cell bn bn $ XmlText open) `unionXml` xmlify inh ts `unionXml` Seq.singleton (Tree0 $ Cell en en $ XmlText close) ---------------------- NodeText t -> Seq.singleton $ Tree0 $ cell $ XmlText t ---------------------- NodeToken tok -> case tok of TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ TL.singleton c TokenText t -> Seq.singleton $ Tree0 $ cell $ XmlText t TokenTag t -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",t)] TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)] ---------------------- NodeLower n as -> Seq.singleton $ element "artwork" $ xmlify inh ts where cell :: a -> Cell a cell = Cell bn en element :: XmlName -> XMLs -> XML element n = Tree (cell $ XmlElem n) instance Xmlify (Seq (Cell (XmlName,TL.Text))) where xmlify _inh = xmlAttrs -- * Elements -- | Reserved elements' name elems :: Set TL.Text elems = [ "about" , "abstract" , "address" , "alias" , "annotation" , "area" , "artwork" , "aside" , "audio" , "author" , "authors" , "bcp14" , "br" , "call" , "city" , "code" , "comment" , "comments" , "country" , "date" , "dd" , "define" , "del" , "div" , "dl" , "document" , "dt" , "editor" , "email" , "embed" , "eref" , "fax" , "feed" , "feedback" , "figure" , "filter" , "format" , "from" , "h" , "hi" , "html5" , "i" , "index" , "iref" , "keyword" , "li" , "link" , "name" , "note" , "ol" , "organization" , "para" , "postamble" , "preamble" , "q" , "ref" , "reference" , "references" , "region" , "rref" , "sc" , "section" , "serie" , "source" , "span" , "street" , "style" , "sub" , "sup" , "table" , "tbody" , "td" , "tel" , "tfoot" , "title" , "th" , "thead" , "toc" , "tof" , "tr" , "tt" , "u" , "ul" , "uri" , "version" , "video" , "workgroup" , "xml" , "zipcode" ] -- * Attributes -- | Convenient alias, forcing the types xmlAttrs :: Seq (Cell (XmlName,TL.Text)) -> XMLs xmlAttrs = (Tree0 . (uncurry XmlAttr <$>) <$>) -- | Extract attributes partitionAttrs :: Roots -> (Seq (Cell (XmlName, TL.Text)), Roots) partitionAttrs ts = (attrs,cs) where (as,cs) = (`Seq.partition` ts) $ \case Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n _ -> False attrs = attr <$> as attr = \case Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a -> Cell bp ep (xmlLocalName n, v) where v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a _ -> undefined getAttrId :: Root -> TL.Text getAttrId = Plain.document . Seq.singleton setAttr :: Cell (XmlName, TL.Text) -> Seq (Cell (XmlName, TL.Text)) -> Seq (Cell (XmlName, TL.Text)) setAttr a@(unCell -> (k, _v)) as = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just idx -> Seq.update idx a as Nothing -> a <| as defaultAttr :: Seq (Cell (XmlName, TL.Text)) -> Cell (XmlName, TL.Text) -> Seq (Cell (XmlName, TL.Text)) defaultAttr as a@(unCell -> (k, _v)) = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just _idx -> as Nothing -> a <| as -- * Text -- | Unify two 'XMLs', merging border 'XmlText's if any. unionXml :: XMLs -> XMLs -> XMLs unionXml x y = case (Seq.viewr x, Seq.viewl y) of (xs :> x0, y0 :< ys) -> case (x0,y0) of ( Tree0 (Cell bx ex (XmlText tx)) , Tree0 (Cell by ey (XmlText ty)) ) -> xs `unionXml` Seq.singleton (Tree0 $ (XmlText <$>) $ Cell bx ex tx <> Cell by ey ty) `unionXml` ys _ -> x <> y (Seq.EmptyR, _) -> y (_, Seq.EmptyL) -> x unionsXml :: Foldable f => f XMLs -> XMLs unionsXml = foldl' unionXml mempty