{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Language.TCT.Write.XML where import Control.Arrow (first) import Control.Monad (Monad(..), (=<<)) import Data.Bool import Data.Default.Class (Default(..)) import Data.Eq (Eq(..)) import Data.Foldable (null, foldl', any) import Data.Function (($), (.), id) import Data.Functor (Functor(..), (<$>)) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>)) import Data.Set (Set) import Data.Text (Text) import Data.TreeSeq.Strict (Tree(..)) import GHC.Exts (toList) import Prelude (undefined) import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Sequence as Seq import qualified Data.Text as Text import qualified Data.Text.Lazy as TL import qualified Control.Monad.Trans.State as S import qualified Language.TCT.Write.Plain as Plain import qualified System.FilePath as FP import Text.Blaze.XML () import Language.TCT hiding (Parser) import Language.XML import qualified Data.TreeSeq.Strict as TreeSeq import Debug.Trace (trace) import Text.Show (show) xmlDocument :: Roots -> XMLs xmlDocument trees = -- (`S.evalState` def) $ case Seq.viewl trees of Tree (unCell -> NodeHeader HeaderSection{}) vs :< ts -> case spanlTokens vs of (titles@(Seq.viewl -> (Seq.viewl -> (posTree -> bp) :< _) :< _), vs') -> let vs'' = case Seq.findIndexL (\case Tree (unCell -> NodeHeader (HeaderColon "about" _)) _ -> True _ -> False) vs' of Nothing -> Tree (Cell bp bp $ NodeHeader $ HeaderColon "about" "") mempty <| vs' Just{} -> vs' in xmlify def { inh_titles = titles , inh_figure = True , inh_tree0 = List.repeat xmlPara } vs'' <> xmlify def ts _ -> xmlify def trees _ -> xmlify def trees {- -- * Type 'Xmls' type Xmls = S.State State XMLs type Xml = S.State State XML instance Semigroup Xmls where (<>) = liftA2 (<>) instance Monoid Xmls where mempty = return mempty mappend = (<>) -- * Type 'State' data State = State { state_pos :: Pos } instance Default State where def = State { state_pos = pos1 } -} -- * Type 'Inh' data Inh = Inh { inh_figure :: Bool , inh_tree0 :: [Pos -> XMLs -> XML] , inh_titles :: Seq Tokens } instance Default Inh where def = Inh { inh_figure = False , inh_tree0 = [] , inh_titles = mempty } -- * Class 'Xmlify' class Xmlify a where xmlify :: Inh -> a -> XMLs instance Xmlify Roots where xmlify inh_orig = go inh_orig where go :: Inh -> Roots -> XMLs go inh trees = case Seq.viewl trees of Tree (Cell bp ep (NodeHeader (HeaderBar n _))) _ :< _ | (body,ts) <- spanlBar n trees , not (null body) -> (<| go inh ts) $ Tree (Cell bp ep "artwork") $ maybe id (\v -> (Tree0 (XmlAttr (Cell bp ep ("type", v))) <|)) (mimetype n) $ body >>= xmlify inh{inh_tree0=[]} Tree nod@(unCell -> NodeHeader (HeaderColon n _)) cs :< ts | (cs',ts') <- spanlHeaderColon n ts , not (null cs') -> go inh $ Tree nod (cs<>cs') <| ts' Tree (Cell bp ep (NodeHeader HeaderBrackets{})) _ :< _ | (rl,ts) <- spanlBrackets trees , not (null rl) -> (<| go inh ts) $ Tree (Cell bp ep "references") $ rl >>= xmlify inh_orig _ | (ul,ts) <- spanlItems (==HeaderDash) trees , Tree (Cell bp ep _) _ :< _ <- Seq.viewl ul -> (<| go inh ts) $ Tree (Cell bp ep "ul") $ ul >>= xmlify inh{inh_tree0=List.repeat xmlPara} _ | (ol,ts) <- spanlItems (\case HeaderDot{} -> True; _ -> False) trees , Tree (Cell bp ep _) _ :< _ <- Seq.viewl ol -> (<| go inh ts) $ Tree (Cell bp ep "ol") $ ol >>= xmlify inh{inh_tree0=List.repeat xmlPara} t@(Tree0 toks) :< ts -> case inh_tree0 inh of [] -> xmlify inh_orig t <> go inh ts _ | isTokenElem toks -> xmlify inh_orig t <> go inh ts tree0:inh_tree0 -> (case Seq.viewl toks of EmptyL -> id (posTree -> bp) :< _ -> (tree0 bp (xmlify inh_orig t) <|)) $ go inh{inh_tree0} ts t: xmlify inh_orig t <> go inh ts _ -> mempty instance Xmlify Root where xmlify inh (Tree (Cell bp ep nod) ts) = case nod of NodeHeader hdr -> case hdr of HeaderSection{} -> let (attrs,body) = partitionAttributesChildren ts in let inh' = inh { inh_tree0 = xmlTitle : List.repeat xmlPara , inh_figure = True } in Seq.singleton $ Tree (Cell bp ep "section") $ xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <> xmlify inh' body HeaderColon kn _wh -> let (attrs,body) = partitionAttributesChildren ts in let inh' = inh { inh_tree0 = case kn of "about" -> xmlTitle : xmlTitle : List.repeat xmlPara "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara "serie" -> List.repeat xmlName "author" -> List.repeat xmlName "editor" -> List.repeat xmlName "org" -> List.repeat xmlName _ -> [] } in case () of _ | kn == "about" -> xmlAbout inh' nod attrs body _ | inh_figure inh && not (kn`List.elem`elems) -> Seq.singleton $ Tree (Cell bp ep "figure") $ xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <> case toList body of [Tree0{}] -> xmlify inh'{inh_tree0 = List.repeat xmlPara} body _ -> xmlify inh'{inh_tree0 = xmlTitle : List.repeat xmlPara} body _ -> Seq.singleton $ x_Header inh' n HeaderGreat n _wh -> x_Header inh' n HeaderEqual n _wh -> x_Header inh' n HeaderBar n _wh -> x_Header inh' n HeaderDot _n -> Tree (cell "li") $ xmlify inh ts HeaderDash -> Tree (cell "li") $ xmlify inh ts HeaderDashDash -> Tree0 $ XmlComment $ cell $ -- debug1_ ("TS", ts) $ -- debug1_ ("RS", (S.evalState (Plain.rackUpLeft ts) Nothing)) $ Plain.text def $ S.evalState (Plain.rackUpLeft ts) Nothing {- TreeSeq.mapAlsoNode (cell1 . unCell) (\_k -> fmap $ TreeSeq.mapAlsoNode (cell1 . unCell) (\_k' -> cell1 . unCell)) <$> ts -} HeaderLower n as -> Tree (cell "artwork") $ xmlify inh ts HeaderBrackets ident -> let inh' = inh{inh_figure = False} in let (attrs',body) = partitionAttributesChildren ts in Tree (cell "reference") $ xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) (attrs<>attrs')) <> xmlify inh'{inh_tree0 = xmlTitle : xmlTitle : List.repeat xmlPara} body HeaderDotSlash p -> Tree (cell "include") $ xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <> xmlify inh ts NodePair pair -> case pair of PairBracket | to <- Plain.text def ts , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to -> Seq.singleton . Tree (cell "rref") $ xmlAttrs [cell ("to",TL.toStrict to)] PairStar -> Seq.singleton . Tree (cell "b") $ xmlify inh ts PairSlash -> Seq.singleton . Tree (cell "i") $ xmlify inh ts PairBackquote -> Seq.singleton . Tree (cell "code") $ xmlify inh ts PairFrenchquote -> Seq.singleton . Tree (cell "q") $ xmlify inh ts {- case ts of (Seq.viewl -> Tree0 (Cell bl el (TokenPlain l)) :< ls) -> case Seq.viewr ls of m :> Tree0 (Cell br er (TokenPlain r)) -> xmlify inh $ Tree0 (Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))) <|(m|>Tree0 (Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))) _ -> xmlify inh $ Tree0 (Cell bl el (TokenPlain (Text.dropAround Char.isSpace l))) <| ls (Seq.viewr -> rs :> Tree0 (Cell br er (TokenPlain r))) -> xmlify inh $ rs |> Tree0 (Cell br er (TokenPlain (Text.dropAround Char.isSpace r))) _ -> xmlify inh ts -} PairHash -> Seq.singleton . Tree (cell "ref") $ xmlAttrs [cell ("to",TL.toStrict $ Plain.text def ts)] PairElem name attrs -> Seq.singleton . Tree (cell $ xmlLocalName name) $ xmlAttrs (Seq.fromList $ (\(_wh,ElemAttr{..}) -> cell (xmlLocalName elemAttr_name,elemAttr_value)) <$> attrs) <> xmlify inh ts _ -> let (o,c) = pairBorders p ts in Seq.singleton (Tree0 $ XmlPhrases $ phrasify $ Cell bp bp o) `unionXml` xmlify inh ts `unionXml` Seq.singleton (Tree0 $ XmlPhrases $ phrasify $ Cell ep ep c) NodeToken tok -> case tok of TokenEscape c -> Seq.singleton $ Tree0 $ XmlPhrases $ phrasify c TokenText t -> Seq.singleton $ Tree0 $ XmlText t TokenTag t -> Seq.singleton $ Tree (cell "ref") $ xmlAttrs [cell ("to",t)] TokenLink lnk -> Seq.singleton $ Tree (cell "eref") $ xmlAttrs [cell ("to",lnk)] where cell :: a -> Cell a cell = Cell bp ep x_Header :: Inh -> Text -> XML x_Header inh' n = Tree (cell $ xmlLocalName n) $ xmlAttrs attrs <> xmlify inh' ts instance Xmlify Tokens where xmlify inh toks = case Seq.viewl toks of Tree (Cell bp _ep (NodePair PairParen)) paren :< (Seq.viewl -> Tree (Cell bb eb (NodePair PairBracket)) bracket :< ts) -> (<| xmlify inh ts) $ case bracket of (toList -> [Tree0 (Cell bl el (TokenLink lnk))]) -> Tree (Cell bp eb "eref") $ xmlAttrs [Cell bl el ("to",lnk)] <> xmlify inh paren _ -> Tree (Cell bp eb "rref") $ xmlAttrs [Cell bb eb ("to",TL.toStrict $ Plain.text def bracket)] <> xmlify inh paren t :< ts -> xmlify inh t `unionXml` xmlify inh ts Seq.EmptyL -> mempty {- instance Xmlify Token where xmlify inh (Tree (Cell bp ep (NodePair p)) ts) = xmlify inh (Tree0 tok) = do where cell :: a -> Cell a cell = Cell bp ep {- whites :: Pos -> Pos -> Seq XmlText whites (Pos bLine bCol) (Pos eLine eCol) = case bLine`compate`eLine of LT -> verts <> EQ -> horiz bCol eCol GT -> -} instance Xmlify (Cell Phrase) where xmlify _inh t = Seq.singleton $ Tree0 $ XmlPhrases $ Seq.singleton t -} mimetype :: Text -> Maybe Text mimetype "hs" = Just "text/x-haskell" mimetype "sh" = Just "text/x-shellscript" mimetype "shell" = Just "text/x-shellscript" mimetype "shellscript" = Just "text/x-shellscript" mimetype _ = Nothing xmlPhantom :: XmlName -> Pos -> XMLs -> XML xmlPhantom n bp = Tree (Cell bp bp n) xmlPara :: Pos -> XMLs -> XML xmlPara = xmlPhantom "para" xmlTitle :: Pos -> XMLs -> XML xmlTitle = xmlPhantom "title" xmlName :: Pos -> XMLs -> XML xmlName bp (toList -> [Tree0 (XmlText (unCell -> t))]) = Tree0 (XmlAttr $ Cell bp bp ("name", t)) xmlName bp ts = xmlPhantom "name" bp ts xmlAbout :: Inh -> Cell Header -> Seq (Cell (XmlName, Text)) -> Roots -> XMLs xmlAbout inh hdr attrs body = Seq.singleton $ xmlHeader inh hdr attrs $ case Seq.viewl (inh_titles inh) of (Seq.viewl -> (posTree -> bt) :< _) :< _ -> ((<$> inh_titles inh) $ \title -> Tree (Cell bt bt $ NodeHeader $ HeaderColon "title" "") $ Seq.singleton $ Tree0 title) <> body _ -> body xmlHeader :: Inh -> Cell Header -> Seq (Cell (XmlName, Text)) -> Roots -> XML xmlHeader inh (Cell bp ep hdr) attrs ts = xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs xmlAttrs = (Tree0 . XmlAttr <$>) -- | Unify two 'XMLs', merging border 'XmlText's if any. unionXml :: XMLs -> XMLs -> XMLs unionXml x y = case (Seq.viewr x, Seq.viewl y) of (xs :> x0, y0 :< ys) -> case (x0,y0) of ( Tree0 (XmlPhrases tx) , Tree0 (XmlPhrases ty) ) -> xs `unionXml` Seq.singleton (Tree0 $ XmlPhrases $ tx <> ty) `unionXml` ys ( Tree0 (XmlText tx) , Tree0 (XmlText ty) ) -> xs `unionXml` Seq.singleton (Tree0 $ XmlText $ tx <> ty) `unionXml` ys _ -> x <> y (Seq.EmptyR, _) -> y (_, Seq.EmptyL) -> x spanlBar :: Name -> Roots -> (Roots, Roots) spanlBar name = first unHeaderBar . spanBar where unHeaderBar :: Roots -> Roots unHeaderBar = (=<<) $ \case Tree (unCell -> NodeHeader HeaderBar{}) ts -> ts _ -> mempty spanBar = Seq.spanl $ \case Tree (unCell -> NodeHeader (HeaderBar n _)) _ | n == name -> True _ -> False spanlItems :: (Header -> Bool) -> Roots -> (Roots, Roots) spanlItems liHeader ts = let (lis, ts') = spanLIs ts in foldl' accumLIs (mempty,ts') lis where spanLIs :: Roots -> (Roots, Roots) spanLIs = Seq.spanl $ \case Tree (unCell -> NodeHeader (liHeader -> True)) _ -> True Tree (NodeToken toks) _ -> (`any` toks) $ \case TreeN (unCell -> NodePair (PairElem "li" _)) _ -> True _ -> False {- case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of [unCell -> TokenPair (PairElem "li" _) _] -> True _ -> False -} _ -> False accumLIs :: (Roots,Roots) -> Root -> (Roots,Roots) accumLIs acc@(oks,kos) t = case t of Tree (unCell -> NodeHeader (liHeader -> True)) _ -> (oks|>t,kos) Tree0 toks -> let (ok,ko) = (`Seq.spanl` toks) $ \case Tree (unCell -> NodePair (PairElem "li" _)) _ -> True -- (isTokenWhite -> True) -> True -- TODO: see if this is still useful _ -> False in ( if null ok then oks else oks|>Tree0 (rmTokenWhite ok) , if null ko then kos else Tree0 ko<|kos ) _ -> acc {- rmTokenWhite :: Tokens -> Tokens rmTokenWhite = Seq.filter $ \case (isTokenWhite -> False) -> True _ -> True -} spanlHeaderColon :: Name -> Roots -> (Roots, Roots) spanlHeaderColon name = Seq.spanl $ \case Tree (unCell -> NodeHeader (HeaderBar n _)) _ -> n == name Tree (unCell -> NodeHeader (HeaderGreat n _)) _ -> n == name _ -> False spanlBrackets :: Roots -> (Roots, Roots) spanlBrackets = Seq.spanl $ \case Tree (unCell -> NodeHeader HeaderBrackets{}) _ -> True _ -> False spanlTokens :: Roots -> (Seq Tokens, Roots) spanlTokens = first ((\case Tree0 ts -> ts _ -> undefined) <$>) . Seq.spanl (\case Tree0{} -> True _ -> False) getAttrId :: Roots -> Text getAttrId ts = case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of Just (Tree0 toks) -> TL.toStrict $ Plain.text def toks _ -> "" setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text)) setXmlAttr a@(unCell -> (k, _v)) as = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just idx -> Seq.update idx a as Nothing -> a <| as defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text)) defXmlAttr a@(unCell -> (k, _v)) as = case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of Just _idx -> as Nothing -> a <| as partitionAttributesChildren :: Roots -> (Seq (Cell (XmlName, Text)), Roots) partitionAttributesChildren ts = (attrs,cs) where (as,cs) = (`Seq.partition` ts) $ \case Tree (unCell -> NodeHeader HeaderEqual{}) _cs -> True _ -> False attrs = attr <$> as attr = \case Tree (Cell bp ep (NodeHeader (HeaderEqual n _wh))) a -> Cell bp ep (xmlLocalName n, v) where v = TL.toStrict $ Plain.text def{Plain.state_escape = False} $ TreeSeq.mapAlsoNode (cell1 . unCell) (\_k -> fmap $ TreeSeq.mapAlsoNode (cell1 . unCell) (\_k' -> cell1 . unCell)) <$> a _ -> undefined elems :: Set Text elems = [ "about" , "abstract" , "address" , "alias" , "annotation" , "area" , "artwork" , "aside" , "audio" , "author" , "authors" , "bcp14" , "br" , "call" , "city" , "code" , "comment" , "comments" , "country" , "date" , "dd" , "define" , "del" , "div" , "dl" , "document" , "dt" , "editor" , "email" , "embed" , "eref" , "fax" , "feed" , "feedback" , "figure" , "filter" , "format" , "from" , "h" , "hi" , "html5" , "i" , "index" , "iref" , "keyword" , "li" , "link" , "name" , "note" , "ol" , "organization" , "para" , "postamble" , "preamble" , "q" , "ref" , "reference" , "references" , "region" , "rref" , "sc" , "section" , "serie" , "source" , "span" , "street" , "style" , "sub" , "sup" , "table" , "tbody" , "td" , "tel" , "tfoot" , "title" , "th" , "thead" , "toc" , "tof" , "tr" , "tt" , "u" , "ul" , "uri" , "version" , "video" , "workgroup" , "xml" , "zipcode" ]