{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Hdoc.TCT.Write.XML where import Control.Monad (Monad(..)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Foldable (Foldable(..)) import Data.Function (($), (.)) import Data.Functor ((<$>), (<$), ($>)) import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (Maybe(..)) import Data.Monoid (Monoid(..)) import Data.Ord (Ord(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>)) import Data.Set (Set) import Data.TreeSeq.Strict (Tree(..), tree0) import Data.Tuple (uncurry) import Prelude (Num(..), undefined) import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text.Lazy as TL import qualified Hdoc.TCT.Write.Plain as Plain -- import Hdoc.TCT.Debug import Hdoc.TCT.Utils import Hdoc.TCT as TCT hiding (Parser) import Hdoc.XML (XML, XMLs) import qualified Hdoc.XML as XML import Text.Blaze.XML () -- | Main entry point -- -- NOTE: 'XmlNode' are still annotated with 'Cell', -- but nothing is done to preserve any ordering amongst them, -- because 'Node's sometimes need to be reordered -- (eg. about/title may have a title from the section before, -- hence outside of about). writeXML :: Roots -> XMLs writeXML (tn@(Tree (Cell ss (NodeHeader (HeaderSection 1))) _ts) Seq.:<| rs) = element "head" (xmlifySection def tn) <| xmlify def rs where element :: XML.Name -> XMLs -> XML element n = Tree (Cell ss $ XML.NodeElem n) writeXML roots = xmlify def roots -- | Generate the content of
or . xmlifySection :: Inh -> Root -> XMLs xmlifySection inh tn@(Tree (Cell ss _nt) _ts) = about <> xmlify inh' body where element :: XML.Name -> XMLs -> XML element n = Tree (Cell ss $ XML.NodeElem n) inh' = inh { inh_para = List.repeat elementPara , inh_figure = True } (titles, content) = partitionSection tn (attrs, body) = partitionAttrs content about = case Seq.viewl titles of EmptyL -> mempty title@(unTree -> ct) :< subtitles -> (xmlAttrs (attrs `defaultAttr` (ct $> ("id",getAttrId title))) <>) $ Seq.singleton $ element "about" $ xmlify inh{inh_para=List.repeat elementTitle} title <> aliases where aliases = subtitles >>= \subtitle@(unTree -> cs) -> return $ Tree (cs $> XML.NodeElem "alias") $ xmlAttrs [ct $> ("id",getAttrId subtitle)] <> xmlify inh{inh_para=List.repeat elementTitle} subtitle -- * Type 'Inh' data Inh = Inh { inh_figure :: Bool , inh_para :: [Inh -> Root -> XML] } instance Default Inh where def = Inh { inh_figure = False , inh_para = List.repeat elementPara } -- ** 'inh_para' elementPara :: Inh -> Root -> XML elementPara inh (Tree c ts) = Tree (XML.NodeElem "para" <$ c) $ xmlify inh ts elementTitle :: Inh -> Root -> XML elementTitle inh (Tree c ts) = Tree (XML.NodeElem "title" <$ c) $ xmlify inh ts elementTitleWith :: Attrs -> Inh -> Root -> XML elementTitleWith attrs inh (Tree c ts) = Tree (XML.NodeElem "title" <$ c) $ xmlAttrs attrs <> xmlify inh ts elementName :: Inh -> Root -> XML elementName inh (Tree c ts) = Tree (XML.NodeElem "name" <$ c) $ xmlify inh ts attributeName :: Inh -> Root -> XML attributeName _inh (Tree c ts) = tree0 (XML.NodeAttr "name" (Plain.writePlain ts) <$ c) attributeId :: Inh -> Root -> XML attributeId _inh (Tree c ts) = tree0 (XML.NodeAttr "id" (Plain.writePlain ts) <$ c) -- * Class 'Xmlify' class Xmlify a where xmlify :: Inh -> a -> XMLs instance Xmlify Roots where xmlify inh roots = case Seq.viewl roots of EmptyL -> mempty r@(Tree cr@(Cell _sr nr) ts) :< rs -> case nr of ---------------------- -- NOTE: HeaderColon becomes parent -- of any continuous following-sibling HeaderBar or HeaderGreat NodeHeader (HeaderColon n _wh) | (span, rest) <- spanlHeaderColon rs , not $ null span -> xmlify inh (Tree cr (ts<>span)) <> xmlify inh rest where spanlHeaderColon :: Roots -> (Roots, Roots) spanlHeaderColon = Seq.spanl $ \case Tree (unCell -> NodeHeader (HeaderBar m _)) _ -> m == n Tree (unCell -> NodeHeader (HeaderGreat m _)) _ -> m == n _ -> False ---------------------- -- NOTE: gather HeaderBrackets NodeHeader HeaderBrackets{} | (span,rest) <- spanlBrackets roots , not (null span) -> (<| xmlify inh rest) $ element "references" $ span >>= xmlify inh where spanlBrackets :: Roots -> (Roots, Roots) spanlBrackets = Seq.spanl $ \case Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True _ -> False ---------------------- -- NOTE: merge adjacent NodeText-s, shouldn't be needed, but just in case. NodeText x | Tree (cy@(unCell -> NodeText y)) ys :< rs' <- Seq.viewl rs -> xmlify inh $ Tree (NodeText <$> (x <$ cr) <> (y <$ cy)) (ts <> ys) <| rs' ---------------------- -- NOTE: detect (some text)[http://some.url] or (some text)[SomeRef] NodePair PairParen | Tree (Cell sb (NodePair PairBracket)) bracket :< rs' <- Seq.viewl rs -> (<| xmlify inh rs') $ case bracket of (toList -> [unTree -> Cell sl (NodeToken (TokenLink lnk))]) -> element "eref" $ xmlAttrs [Cell sl ("to",lnk)] <> xmlify inh ts _ -> element "ref" $ xmlAttrs [Cell sb ("to",Plain.writePlain bracket)] <> if null ts -- NOTE: preserve empty parens then Seq.singleton $ tree0 (XML.NodeText "" <$ cr) else xmlify inh ts ---------------------- -- NOTE: gather HeaderDash _ | (span, rest) <- spanlItems (==HeaderDash) roots , not $ null span -> (<| xmlify inh rest) $ element "ul" $ span >>= xmlify inh{inh_para=List.repeat elementPara} ---------------------- -- NOTE: gather HeaderDot | (span,rest) <- spanlItems (\case HeaderDot{} -> True; _ -> False) roots , not $ null span -> (<| xmlify inh rest) $ element "ol" $ span >>= xmlify inh{inh_para=List.repeat elementPara} where spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots) spanlItems liHeader = Seq.spanl $ \(unTree -> (unCell -> nod)) -> case nod of NodeHeader (HeaderColon "li" _wh) -> True NodeHeader hdr -> liHeader hdr NodePair (PairElem "li" _as) -> True _ -> False ---------------------- NodePara | para:inh_para <- inh_para inh -> para inh r <| xmlify inh{inh_para} rs ---------------------- -- NOTE: context-free Root _ -> xmlify inh r `unionXml` xmlify inh rs where element :: XML.Name -> XMLs -> XML element n = Tree (XML.NodeElem n <$ cr) instance Xmlify Root where xmlify inh tn@(Tree (Cell ss@(sn:|ssn) nod) ts) = case nod of ---------------------- NodePara -> case inh_para inh of [] -> xmlify inh ts para:_ -> Seq.singleton $ para inh tn ---------------------- NodeHeader hdr -> case hdr of -- HeaderSection{} -> Seq.singleton $ element "section" $ xmlifySection inh tn -- HeaderColon n _wh -> let (attrs,body) = partitionAttrs ts in case n of -- NOTE: disable 'inh_figure' "about" -> Seq.singleton $ element "about" $ xmlAttrs attrs <> xmlify inh'{inh_figure=False} body -- NOTE: handle judgment _ | n`List.elem`elemsJudgment -> -- FIXME: not a special case so far. Seq.singleton $ element (XML.localName n) $ xmlAttrs attrs <> xmlify inh'' body where inh'' = inh' { inh_para = case n of "grades" -> List.repeat attributeId "judges" -> List.repeat attributeId _ -> List.repeat elementTitle } -- NOTE: in
mode, unreserved elements become
_ | inh_figure inh && n`List.notElem`elems || TL.null n -> Seq.singleton $ element "figure" $ -- xmlAttrs (setAttr (Cell en en ("type",n)) attrs) <> xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_begin sn}:|ssn) ("type", n)) <> case toList body of [Tree0{}] -> xmlify inh'{inh_para = List.repeat elementPara} body _ -> xmlify inh'{inh_para = elementTitle : List.repeat elementPara} body -- NOTE: reserved elements _ -> Seq.singleton $ element (XML.localName n) $ xmlAttrs attrs <> xmlify inh' body where inh' = inh { inh_para = case n of "about" -> List.repeat elementTitle "reference" -> List.repeat elementTitle "serie" -> List.repeat attributeName "author" -> List.repeat attributeName "editor" -> List.repeat attributeName "org" -> List.repeat attributeName "note" -> List.repeat elementPara _ -> [] } -- HeaderBar n wh -> if inh_figure inh && n`List.notElem`elems || TL.null n then Seq.singleton $ element "artwork" $ xmlAttrs (Seq.singleton $ Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <> xmlify inh{inh_para=[]} ts else xmlify inh $ Tree (cell $ NodeHeader $ HeaderColon n wh) ts -- HeaderGreat n _wh -> Seq.singleton $ let (attrs,body) = partitionAttrs ts in element "quote" $ xmlAttrs (attrs `defaultAttr` Cell (sn{span_end=span_end sn}:|ssn) ("type", n)) <> xmlify inh{inh_para=List.repeat elementPara} body -- HeaderEqual n _wh -> Seq.singleton $ Tree0 $ cell $ XML.NodeAttr (XML.localName n) $ Plain.text (Plain.setStart ts def{Plain.state_escape = False}) ts -- HeaderDot n -> Seq.singleton $ element "li" $ let span_end = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length n)} in xmlAttrs (Seq.singleton $ Cell (sn{span_end}:|ssn) ("name", n)) <> xmlify inh ts -- HeaderDash -> Seq.singleton $ element "li" $ xmlify inh ts -- HeaderDashDash -> Seq.singleton $ Tree0 $ cell $ XML.NodeComment $ Plain.writePlain ts -- HeaderBrackets ident -> let (attrs,body) = partitionAttrs ts in Seq.singleton $ element "reference" $ xmlAttrs (setAttr (Cell (sn{span_end=span_end sn}:|ssn) ("id",ident)) attrs) |> element "about" ( xmlify inh'{inh_para = List.repeat elementTitle} body ) where inh' = inh{inh_figure = False} -- HeaderDotSlash _file -> xmlify inh ts ---------------------- NodePair pair -> case pair of PairBracket | to <- Plain.writePlain ts , TL.all (\c -> c/='[' && c/=']' && Char.isPrint c && not (Char.isSpace c)) to -> Seq.singleton $ element "ref" $ xmlAttrs [cell ("to",to)] PairStar -> Seq.singleton $ element "b" $ xmlify inh ts PairDash -> Seq.singleton $ element "del" $ xmlify inh ts PairUnderscore -> Seq.singleton $ element "u" $ xmlify inh ts PairSlash -> Seq.singleton $ element "i" $ xmlify inh ts PairBackquote -> Seq.singleton $ element "code" $ xmlify inh ts PairFrenchquote -> Seq.singleton $ element "q" $ case ts of (Seq.viewl -> Tree0 (Cell sl (NodeToken (TokenText l))) :< ls) -> case Seq.viewr ls of m :> Tree0 (Cell sr (NodeToken (TokenText r))) -> xmlify inh $ Tree0 (Cell sl (NodeToken (TokenText (TL.dropWhile Char.isSpace l)))) Seq.<|(m Seq.|>Tree0 (Cell sr (NodeToken (TokenText (TL.dropWhileEnd Char.isSpace r))))) _ -> xmlify inh $ Tree0 (Cell sl (NodeToken (TokenText (TL.dropAround Char.isSpace l)))) Seq.<| ls (Seq.viewr -> rs :> Tree0 (Cell sr (NodeToken (TokenText r)))) -> xmlify inh $ rs Seq.|> Tree0 (Cell sr (NodeToken (TokenText (TL.dropAround Char.isSpace r)))) _ -> xmlify inh ts PairTag isBackref -> Seq.singleton $ element (if isBackref then "tag-back" else "tag") $ xmlAttrs [cell ("to",Plain.writePlain ts)] -- xmlAttrs [cell ("to",to)] -- xmlify inh{inh_para=[]} ts -- xmlAttrs [cell ("to",Plain.writePlain ts)] PairAt isBackref -> Seq.singleton $ element (if isBackref then "at-back" else "at") $ xmlAttrs [cell ("to",Plain.writePlain ts)] PairElem name attrs -> Seq.singleton $ element (XML.localName name) $ xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) -> cell (XML.localName elemAttr_name,elemAttr_value)) <$> attrs) <> xmlify inh ts _ -> Seq.singleton (Tree0 $ Cell (sn{span_end=bn'}:|ssn) $ XML.NodeText open) `unionXml` xmlify inh ts `unionXml` Seq.singleton (Tree0 $ Cell (sn{span_begin=en'}:|ssn) $ XML.NodeText close) where (open, close) = pairBorders pair ts bn' = (span_begin sn){pos_column=pos_column (span_begin sn) + int (TL.length open)} en' = (span_end sn){pos_column=pos_column (span_end sn) - int (TL.length close)} ---------------------- NodeText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t ---------------------- NodeToken tok -> case tok of TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XML.NodeText $ TL.singleton c TokenText t -> Seq.singleton $ Tree0 $ cell $ XML.NodeText t TokenAt b to -> Seq.singleton $ element (if b then "at-back" else "at") $ xmlAttrs [cell ("to",to)] -- Seq.singleton $ Tree0 $ cell $ XML.NodeText to TokenTag b to -> Seq.singleton $ element (if b then "tag-back" else "tag") $ xmlAttrs [cell ("to",to)] -- Seq.singleton $ Tree0 $ cell $ XML.NodeText to TokenLink lnk -> Seq.singleton $ element "eref" $ xmlAttrs [cell ("to",lnk)] ---------------------- NodeLower n as -> Seq.singleton $ element "artwork" $ xmlify inh ts where cell :: a -> Cell a cell = Cell ss element :: XML.Name -> XMLs -> XML element n = Tree (cell $ XML.NodeElem n) {- instance Xmlify (Seq (Cell (XML.Name,TL.Text))) where xmlify _inh = xmlAttrs -} -- * Elements -- | Reserved elements' name elems :: Set TL.Text -- TODO: use a @data@ or maybe a @data family@ instead of 'TL.Text' elems = [ "about" , "abstract" , "address" , "alias" , "annotation" , "area" , "artwork" , "aside" , "at" , "at-back" , "audio" , "author" , "authors" , "bcp14" , "br" , "break" , "call" , "city" , "code" , "comment" , "comments" , "country" , "date" , "dd" , "default" , "define" , "del" , "div" , "dl" , "document" , "dt" , "editor" , "email" , "embed" , "eref" , "fax" , "feed" , "feedback" , "figure" , "filter" , "format" , "from" , "h" , "head" , "hi" , "html5" , "i" , "index" , "iref" , "keyword" , "li" , "link" , "name" , "note" , "ol" , "organization" , "para" , "postamble" , "preamble" , "q" , "ref" , "reference" , "references" , "refs" , "region" , "sc" , "section" , "serie" , "source" , "span" , "street" , "style" , "sub" , "sup" , "table" , "tag" , "tag-back" , "tbody" , "td" , "tel" , "tfoot" , "th" , "thead" , "title" , "toc" , "tof" , "tr" , "tt" , "u" , "ul" , "uri" , "version" , "video" , "workgroup" , "xml" , "zipcode" ] elemsJudgment :: Set TL.Text elemsJudgment = [ "choice" , "grade" , "grades" , "judge" , "judges" , "judgment" , "opinion" ] -- * Attributes type Attrs = Seq (Cell (XML.Name, TL.Text)) -- | Convenient alias, forcing the types xmlAttrs :: Attrs -> XMLs xmlAttrs = (Tree0 . (uncurry XML.NodeAttr <$>) <$>) -- | Extract section titles partitionSection :: Root -> (Roots, Roots) partitionSection (Tree (unCell -> NodeHeader (HeaderSection lvlPar)) body) = case Seq.viewl body of EmptyL -> mempty title@(unTree -> Cell (Span{span_end=et}:|_) NodePara) :< rest -> let (subtitles, content) = spanlSubtitles et rest in (title <| (subtitles >>= subTrees), content) where spanlSubtitles ep ts = case Seq.viewl ts of sub@(unTree -> Cell (Span{..}:|_) (NodeHeader (HeaderSection lvlSub))) :< rs | lvlSub <= lvlPar , pos_line span_begin - pos_line ep <= 1 -> let (subs, ts') = spanlSubtitles span_end rs in (sub <| subs, ts') _ -> (mempty, ts) _ -> (mempty, body) partitionSection _ = mempty -- | Extract attributes partitionAttrs :: Roots -> (Seq (Cell (XML.Name, TL.Text)), Roots) partitionAttrs ts = (attrs,cs) where (as,cs) = (`Seq.partition` ts) $ \case Tree (unCell -> NodeHeader (HeaderEqual n _wh)) _cs -> not $ TL.null n _ -> False attrs = attr <$> as attr = \case Tree (Cell loc (NodeHeader (HeaderEqual n _wh))) a -> Cell loc (XML.localName n, v) where v = Plain.text (Plain.setStart a def{Plain.state_escape = False}) a _ -> undefined getAttrId :: Root -> TL.Text getAttrId = Plain.writePlain . Seq.singleton setAttr :: Cell (XML.Name, TL.Text) -> Seq (Cell (XML.Name, TL.Text)) -> Seq (Cell (XML.Name, TL.Text)) setAttr a@(unCell -> (k, _v)) as = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just idx -> Seq.update idx a as Nothing -> a <| as defaultAttr :: Seq (Cell (XML.Name, TL.Text)) -> Cell (XML.Name, TL.Text) -> Seq (Cell (XML.Name, TL.Text)) defaultAttr as a@(unCell -> (k, _v)) = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just _idx -> as Nothing -> a <| as -- * Text -- | Unify two 'XMLs', merging border 'XML.NodeText's if any. unionXml :: XMLs -> XMLs -> XMLs unionXml x y = case (Seq.viewr x, Seq.viewl y) of (xs :> x0, y0 :< ys) -> case (x0,y0) of ( Tree0 (Cell ssx@(Span {span_file=fx}:|_sx) (XML.NodeText tx)) , Tree0 (Cell ssy@(Span {span_file=fy}:|_sy) (XML.NodeText ty)) ) | fx == fy -> xs `unionXml` Seq.singleton (Tree0 $ (XML.NodeText <$>) $ Cell ssx tx <> Cell ssy ty) `unionXml` ys _ -> x <> y (Seq.EmptyR, _) -> y (_, Seq.EmptyL) -> x unionsXml :: Foldable f => f XMLs -> XMLs unionsXml = foldl' unionXml mempty