{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.TCT.Write.XML where import Control.Arrow (first) import Control.Monad (Monad(..), (=<<)) import Data.Bool import Data.Eq (Eq(..)) import Data.Foldable (null, foldl', any) import Data.Function (($), (.), id) import Data.Functor (Functor(..), (<$>)) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>)) import Data.Set (Set) import Data.Text (Text) import Data.TreeSeq.Strict (Tree(..)) import GHC.Exts (toList) import Prelude (undefined) import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text as Text import qualified Data.Text.Lazy as TL import qualified Language.TCT.Write.Text as Write import qualified System.FilePath as FP import Text.Blaze.XML () import Language.TCT hiding (Parser) import Language.XML import qualified Data.TreeSeq.Strict as TreeSeq -- * Type 'InhXml' data InhXml = InhXml { inhXml_figure :: Bool , inhXml_tree0 :: [Pos -> XMLs -> XML] , inhXml_titles :: Seq Tokens } inhXml :: InhXml inhXml = InhXml { inhXml_figure = False , inhXml_tree0 = [] , inhXml_titles = mempty } mimetype :: Text -> Maybe Text mimetype "hs" = Just "text/x-haskell" mimetype "sh" = Just "text/x-shellscript" mimetype "shell" = Just "text/x-shellscript" mimetype "shellscript" = Just "text/x-shellscript" mimetype _ = Nothing xmlPhantom :: XmlName -> Pos -> XMLs -> XML xmlPhantom n bp = TreeN (Cell bp bp n) xmlPara :: Pos -> XMLs -> XML xmlPara = xmlPhantom "para" xmlTitle :: Pos -> XMLs -> XML xmlTitle = xmlPhantom "title" xmlName :: Pos -> XMLs -> XML xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t) xmlName bp ts = xmlPhantom "name" bp ts xmlDocument :: TCTs -> XMLs xmlDocument trees = case Seq.viewl trees of TreeN (unCell -> KeySection{}) vs :< ts -> case spanlTokens vs of (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') -> let vs'' = case Seq.findIndexL (\case TreeN (unCell -> KeyColon "about" _) _ -> True _ -> False) vs' of Just{} -> vs' Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs' in xmlTCTs inhXml { inhXml_titles = titles , inhXml_figure = True , inhXml_tree0 = List.repeat xmlPara } vs'' <> xmlTCTs inhXml ts _ -> xmlTCTs inhXml trees _ -> xmlTCTs inhXml trees xmlTCTs :: InhXml -> TCTs -> XMLs xmlTCTs inh_orig = go inh_orig where go :: InhXml -> TCTs -> XMLs go inh trees = case Seq.viewl trees of TreeN (Cell bp ep (KeyBar n _)) _ :< _ | (body,ts) <- spanlBar n trees , not (null body) -> (<| go inh ts) $ TreeN (Cell bp ep "artwork") $ maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $ body >>= xmlTCT inh{inhXml_tree0=[]} TreeN key@(unCell -> KeyColon n _) cs :< ts | (cs',ts') <- spanlKeyColon n ts , not (null cs') -> go inh $ TreeN key (cs<>cs') <| ts' TreeN (Cell bp ep KeyBrackets{}) _ :< _ | (rl,ts) <- spanlBrackets trees , not (null rl) -> (<| go inh ts) $ TreeN (Cell bp ep "rl") $ rl >>= xmlTCT inh_orig _ | (ul,ts) <- spanlItems (==KeyDash) trees , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul -> (<| go inh ts) $ TreeN (Cell bp ep "ul") $ ul >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara} _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol -> (<| go inh ts) $ TreeN (Cell bp ep "ol") $ ol >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara} t@(Tree0 toks) :< ts | isTokenElem toks -> xmlTCT inh_orig t <> go inh ts t@(Tree0 toks) :< ts -> case inhXml_tree0 inh of [] -> xmlTCT inh_orig t <> go inh{inhXml_tree0=[]} ts x:xs -> case Seq.viewl toks of EmptyL -> go inh{inhXml_tree0=xs} ts Cell bp _ep _ :< _ -> (<| go inh{inhXml_tree0=xs} ts) $ x bp $ xmlTCT inh_orig t t: xmlTCT inh_orig t <> go inh ts _ -> mempty xmlTCT :: InhXml -> TCT -> XMLs xmlTCT inh tr = case tr of TreeN (Cell bp ep KeySection{}) ts -> let (attrs,body) = partitionAttributesChildren ts in let inh' = inh { inhXml_tree0 = xmlTitle : List.repeat xmlPara , inhXml_figure = True } in Seq.singleton $ TreeN (Cell bp ep "section") $ xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <> xmlTCTs inh' body TreeN key@(Cell bp ep (KeyColon kn _)) ts -> let (attrs,body) = partitionAttributesChildren ts in let inh' = inh { inhXml_tree0 = case kn of "about" -> xmlTitle : xmlTitle : List.repeat xmlPara "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara "author" -> List.repeat xmlName _ -> [] } in case () of _ | kn == "about" -> xmlAbout inh' key attrs body _ | inhXml_figure inh && not (kn`List.elem`elems) -> Seq.singleton $ TreeN (Cell bp ep "figure") $ xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <> case toList body of [Tree0{}] -> xmlTCTs inh'{inhXml_tree0 = List.repeat xmlPara} body _ -> xmlTCTs inh'{inhXml_tree0 = xmlTitle : List.repeat xmlPara} body _ -> Seq.singleton $ xmlKey inh' key attrs body TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts Tree0 ts -> xmlTokens ts xmlAbout :: InhXml -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XMLs xmlAbout inh key attrs body = Seq.singleton $ xmlKey inh key attrs $ case Seq.viewl (inhXml_titles inh) of (Seq.viewl -> Cell bt _et _ :< _) :< _ -> ((<$> inhXml_titles inh) $ \title -> TreeN (Cell bt bt $ KeyColon "title" "") $ Seq.singleton $ Tree0 title) <> body _ -> body xmlKey :: InhXml -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML xmlKey inh (Cell bp ep key) attrs ts = case key of KeyColon n _wh -> d_key n KeyGreat n _wh -> d_key n KeyEqual n _wh -> d_key n KeyBar n _wh -> d_key n KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com where com :: TL.Text com = Write.text Write.config_text $ TreeSeq.mapAlsoKey (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> ts KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts KeyBrackets ident -> let inh' = inh{inhXml_figure = False} in TreeN (cell "reference") $ xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) attrs) <> xmlTCTs inh' ts KeyDotSlash p -> TreeN (cell "include") $ xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <> xmlTCTs inh ts where cell :: a -> Cell a cell = Cell bp ep d_key :: Text -> XML d_key n = TreeN (cell $ xmlLocalName n) $ xmlAttrs attrs <> xmlTCTs inh ts xmlTokens :: Tokens -> XMLs xmlTokens tok = goTokens tok where go :: Cell Token -> XMLs go (Cell bp ep tk) = case tk of TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)] TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c TokenLink lnk -> Seq.singleton $ TreeN (cell "eref") $ xmlAttrs [cell ("to",lnk)] |> Tree0 (cell $ XmlText lnk) TokenPair PairBracket ts | to <- Write.textTokens ts , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to -> Seq.singleton $ TreeN (cell "rref") $ xmlAttrs [cell ("to",TL.toStrict to)] TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts TokenPair PairFrenchquote toks@ts -> Seq.singleton $ TreeN (cell "q") $ case ts of (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) -> case Seq.viewr ls of m :> Cell br er (TokenPlain r) -> goTokens $ Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l)) <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r))) _ -> goTokens $ Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls (Seq.viewr -> rs :> Cell br er (TokenPlain r)) -> goTokens $ rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r)) _ -> goTokens toks TokenPair PairHash to -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",TL.toStrict $ Write.textTokens to)] TokenPair (PairElem name attrs) ts -> Seq.singleton $ TreeN (cell $ xmlLocalName name) $ xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <> goTokens ts TokenPair p ts -> let (o,c) = pairBorders p ts in Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml` goTokens ts `unionXml` Seq.singleton (Tree0 $ Cell ep ep $ XmlText c) where cell :: a -> Cell a cell = Cell bp ep goTokens :: Tokens -> XMLs goTokens toks = case Seq.viewl toks of Cell bp _ep (TokenPair PairParen paren) :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket) :< ts) -> (<| goTokens ts) $ case bracket of (toList -> [Cell bl el (TokenLink lnk)]) -> TreeN (Cell bp eb "eref") $ xmlAttrs [Cell bl el ("to",lnk)] <> goTokens paren _ -> TreeN (Cell bp eb "rref") $ xmlAttrs [Cell bb eb ("to",TL.toStrict $ Write.textTokens bracket)] <> goTokens paren t :< ts -> go t `unionXml` goTokens ts Seq.EmptyL -> mempty -- | Unify two 'XMLs', merging border 'XmlText's if any. unionXml :: XMLs -> XMLs -> XMLs unionXml x y = case (Seq.viewr x, Seq.viewl y) of (xs :> x0, y0 :< ys) -> case (x0,y0) of (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) -> xs `unionXml` Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml` ys _ -> x <> y (Seq.EmptyR, _) -> y (_, Seq.EmptyL) -> x spanlBar :: Name -> TCTs -> (TCTs, TCTs) spanlBar name = first unKeyBar . spanBar where unKeyBar :: TCTs -> TCTs unKeyBar = (=<<) $ \case TreeN (unCell -> KeyBar{}) ts -> ts _ -> mempty spanBar = Seq.spanl $ \case TreeN (unCell -> KeyBar n _) _ | n == name -> True _ -> False spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs) spanlItems liKey ts = let (lis, ts') = spanLIs ts in foldl' accumLIs (mempty,ts') lis where spanLIs = Seq.spanl $ \case TreeN (unCell -> liKey -> True) _ -> True Tree0 toks -> (`any` toks) $ \case (unCell -> TokenPair (PairElem "li" _) _) -> True _ -> False {- case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of [unCell -> TokenPair (PairElem "li" _) _] -> True _ -> False -} _ -> False accumLIs acc@(oks,kos) t = case t of TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos) Tree0 toks -> let (ok,ko) = (`Seq.spanl` toks) $ \tok -> case unCell tok of TokenPair (PairElem "li" _) _ -> True TokenPlain txt -> Char.isSpace`Text.all`txt _ -> False in ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok) , if null ko then kos else Tree0 ko<|kos ) _ -> acc rmTokenPlain = Seq.filter $ \case (unCell -> TokenPlain{}) -> False _ -> True spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs) spanlKeyColon name = Seq.spanl $ \case TreeN (unCell -> KeyBar n _) _ -> n == name TreeN (unCell -> KeyGreat n _) _ -> n == name _ -> False spanlBrackets :: TCTs -> (TCTs, TCTs) spanlBrackets = Seq.spanl $ \case TreeN (unCell -> KeyBrackets{}) _ -> True _ -> False spanlTokens :: TCTs -> (Seq Tokens, TCTs) spanlTokens = first ((\case Tree0 ts -> ts _ -> undefined) <$>) . Seq.spanl (\case Tree0{} -> True _ -> False) getAttrId :: TCTs -> Text getAttrId ts = case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of Just (Tree0 toks) -> TL.toStrict $ Write.textTokens toks _ -> "" setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text)) setXmlAttr a@(unCell -> (k, _v)) as = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just idx -> Seq.update idx a as Nothing -> a <| as defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text)) defXmlAttr a@(unCell -> (k, _v)) as = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just _idx -> as Nothing -> a <| as xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>) {- xmlAttr :: XmlAttrs -> (Text,Attr) -> XmlAttrs xmlAttr acc (_,Attr{..}) = Map.insert (xmlLocalName attr_name) attr_value acc -- TODO: conflict -} {- d_Attributes :: XmlAttrs -> DTC -> DTC d_Attributes = flip $ Map.foldrWithKey $ \n v -> B.AddCustomAttribute (B.Text n) (B.Text v) -} partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs) partitionAttributesChildren ts = (attrs,cs) where (as,cs) = (`Seq.partition` ts) $ \case TreeN (unCell -> KeyEqual{}) _cs -> True _ -> False attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as attr = \case TreeN (Cell bp ep (KeyEqual n _wh)) a -> Cell bp ep (xmlLocalName n, v) where v = TL.toStrict $ Write.text Write.config_text{Write.config_text_escape = False} $ TreeSeq.mapAlsoKey (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a _ -> undefined elems :: Set Text elems = [ "about" , "abstract" , "address" , "alias" , "annotation" , "area" , "artwork" , "aside" , "audio" , "author" , "authors" , "bcp14" , "br" , "call" , "city" , "code" , "comment" , "comments" , "country" , "date" , "dd" , "define" , "del" , "div" , "dl" , "document" , "dt" , "editor" , "email" , "embed" , "eref" , "fax" , "feed" , "feedback" , "figure" , "filter" , "format" , "from" , "h" , "hi" , "html5" , "i" , "index" , "iref" , "keyword" , "li" , "link" , "name" , "note" , "ol" , "organization" , "para" , "postamble" , "preamble" , "q" , "ref" , "reference" , "region" , "rl" , "rref" , "sc" , "section" , "serie" , "source" , "span" , "street" , "style" , "sub" , "sup" , "table" , "tbody" , "td" , "tel" , "tfoot" , "title" , "th" , "thead" , "toc" , "tof" , "tr" , "tt" , "ul" , "uri" , "version" , "video" , "workgroup" , "xml" , "zipcode" ]