]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Add References, --trace and other stuffs.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Eq (Eq(..))
13 import Data.Foldable (null, foldl', any)
14 import Data.Function (($), (.), id)
15 import Data.Functor (Functor(..), (<$>))
16 import Data.Maybe (Maybe(..), maybe)
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
20 import Data.Set (Set)
21 import Data.Text (Text)
22 import Data.TreeSeq.Strict (Tree(..))
23 import GHC.Exts (toList)
24 import Prelude (undefined)
25 import qualified Data.Char as Char
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text as Text
29 import qualified Data.Text.Lazy as TL
30 import qualified Language.TCT.Write.Text as Write
31 import qualified System.FilePath as FP
32
33 import Text.Blaze.XML ()
34 import Language.TCT hiding (Parser)
35 import Language.XML
36 import qualified Data.TreeSeq.Strict as TreeSeq
37
38 -- * Type 'InhXml'
39 data InhXml
40 = InhXml
41 { inhXml_figure :: Bool
42 , inhXml_tree0 :: [Pos -> XMLs -> XML]
43 , inhXml_titles :: Seq Tokens
44 }
45 inhXml :: InhXml
46 inhXml = InhXml
47 { inhXml_figure = False
48 , inhXml_tree0 = []
49 , inhXml_titles = mempty
50 }
51
52 mimetype :: Text -> Maybe Text
53 mimetype "hs" = Just "text/x-haskell"
54 mimetype "sh" = Just "text/x-shellscript"
55 mimetype "shell" = Just "text/x-shellscript"
56 mimetype "shellscript" = Just "text/x-shellscript"
57 mimetype _ = Nothing
58
59 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
60 xmlPhantom n bp = TreeN (Cell bp bp n)
61 xmlPara :: Pos -> XMLs -> XML
62 xmlPara = xmlPhantom "para"
63 xmlTitle :: Pos -> XMLs -> XML
64 xmlTitle = xmlPhantom "title"
65 xmlName :: Pos -> XMLs -> XML
66 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
67 xmlName bp ts = xmlPhantom "name" bp ts
68
69 xmlDocument :: TCTs -> XMLs
70 xmlDocument trees =
71 case Seq.viewl trees of
72 TreeN (unCell -> KeySection{}) vs :< ts ->
73 case spanlTokens vs of
74 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
75 let vs'' =
76 case Seq.findIndexL
77 (\case
78 TreeN (unCell -> KeyColon "about" _) _ -> True
79 _ -> False) vs' of
80 Just{} -> vs'
81 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
82 in
83 xmlTCTs inhXml
84 { inhXml_titles = titles
85 , inhXml_figure = True
86 , inhXml_tree0 = List.repeat xmlPara
87 } vs'' <>
88 xmlTCTs inhXml ts
89 _ -> xmlTCTs inhXml trees
90 _ -> xmlTCTs inhXml trees
91
92 xmlTCTs :: InhXml -> TCTs -> XMLs
93 xmlTCTs inh_orig = go inh_orig
94 where
95 go :: InhXml -> TCTs -> XMLs
96 go inh trees =
97 case Seq.viewl trees of
98 TreeN (Cell bp ep (KeyBar n _)) _ :< _
99 | (body,ts) <- spanlBar n trees
100 , not (null body) ->
101 (<| go inh ts) $
102 TreeN (Cell bp ep "artwork") $
103 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
104 body >>= xmlTCT inh{inhXml_tree0=[]}
105
106 TreeN key@(unCell -> KeyColon n _) cs :< ts
107 | (cs',ts') <- spanlKeyColon n ts
108 , not (null cs') ->
109 go inh $ TreeN key (cs<>cs') <| ts'
110
111 TreeN (Cell bp ep KeyBrackets{}) _ :< _
112 | (rl,ts) <- spanlBrackets trees
113 , not (null rl) ->
114 (<| go inh ts) $
115 TreeN (Cell bp ep "references") $
116 rl >>= xmlTCT inh_orig
117
118 _ | (ul,ts) <- spanlItems (==KeyDash) trees
119 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
120 (<| go inh ts) $
121 TreeN (Cell bp ep "ul") $
122 ul >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
123
124 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
125 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
126 (<| go inh ts) $
127 TreeN (Cell bp ep "ol") $
128 ol >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
129
130 t@(Tree0 toks) :< ts | isTokenElem toks ->
131 xmlTCT inh_orig t <>
132 go inh ts
133
134 t@(Tree0 toks) :< ts ->
135 case inhXml_tree0 inh of
136 [] ->
137 xmlTCT inh_orig t <>
138 go inh{inhXml_tree0=[]} ts
139 x:xs ->
140 case Seq.viewl toks of
141 EmptyL -> go inh{inhXml_tree0=xs} ts
142 Cell bp _ep _ :< _ ->
143 (<| go inh{inhXml_tree0=xs} ts) $
144 x bp $
145 xmlTCT inh_orig t
146
147 t:<ts ->
148 xmlTCT inh_orig t <>
149 go inh ts
150
151 _ -> mempty
152
153 xmlTCT :: InhXml -> TCT -> XMLs
154 xmlTCT inh tr =
155 case tr of
156 TreeN (Cell bp ep KeySection{}) ts ->
157 let (attrs,body) = partitionAttributesChildren ts in
158 let inh' = inh
159 { inhXml_tree0 = xmlTitle : List.repeat xmlPara
160 , inhXml_figure = True
161 } in
162 Seq.singleton $
163 TreeN (Cell bp ep "section") $
164 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
165 xmlTCTs inh' body
166
167 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
168 let (attrs,body) = partitionAttributesChildren ts in
169 let inh' = inh { inhXml_tree0 =
170 case kn of
171 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
172 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
173 "author" -> List.repeat xmlName
174 "editor" -> List.repeat xmlName
175 _ -> []
176 } in
177 case () of
178 _ | kn == "about" -> xmlAbout inh' key attrs body
179
180 _ | inhXml_figure inh && not (kn`List.elem`elems) ->
181 Seq.singleton $
182 TreeN (Cell bp ep "figure") $
183 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
184 case toList body of
185 [Tree0{}] -> xmlTCTs inh'{inhXml_tree0 = List.repeat xmlPara} body
186 _ -> xmlTCTs inh'{inhXml_tree0 = xmlTitle : List.repeat xmlPara} body
187
188 _ -> Seq.singleton $ xmlKey inh' key attrs body
189
190 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
191
192 Tree0 ts -> xmlTokens ts
193
194 xmlAbout ::
195 InhXml ->
196 Cell Key -> Seq (Cell (XmlName, Text)) ->
197 TCTs -> XMLs
198 xmlAbout inh key attrs body =
199 Seq.singleton $
200 xmlKey inh key attrs $
201 case Seq.viewl (inhXml_titles inh) of
202 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
203 ((<$> inhXml_titles inh) $ \title ->
204 TreeN (Cell bt bt $ KeyColon "title" "") $
205 Seq.singleton $ Tree0 title)
206 <> body
207 _ -> body
208
209 xmlKey :: InhXml -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
210 xmlKey inh (Cell bp ep key) attrs ts =
211 case key of
212 KeyColon n _wh -> d_key n
213 KeyGreat n _wh -> d_key n
214 KeyEqual n _wh -> d_key n
215 KeyBar n _wh -> d_key n
216 KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts
217 KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts
218 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
219 where
220 com :: TL.Text
221 com =
222 Write.text Write.config_text $
223 TreeSeq.mapAlsoNode
224 (cell1 . unCell)
225 (\_path -> fmap $ cell1 . unCell) <$> ts
226 KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts
227 KeyBrackets ident ->
228 let inh' = inh{inhXml_figure = False} in
229 let (attrs',body) = partitionAttributesChildren ts in
230 TreeN (cell "reference") $
231 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) (attrs<>attrs')) <>
232 xmlTCTs inh' body
233 KeyDotSlash p ->
234 TreeN (cell "include") $
235 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
236 xmlTCTs inh ts
237 where
238 cell :: a -> Cell a
239 cell = Cell bp ep
240 d_key :: Text -> XML
241 d_key n =
242 TreeN (cell $ xmlLocalName n) $
243 xmlAttrs attrs <>
244 xmlTCTs inh ts
245
246 xmlTokens :: Tokens -> XMLs
247 xmlTokens tok = goTokens tok
248 where
249 go :: Cell Token -> XMLs
250 go (Cell bp ep tk) =
251 case tk of
252 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
253 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
254 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
255 TokenLink lnk -> Seq.singleton $
256 TreeN (cell "eref") $
257 xmlAttrs [cell ("to",lnk)]
258 TokenPair PairBracket ts | to <- Write.textTokens ts
259 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
260 Seq.singleton $
261 TreeN (cell "rref") $
262 xmlAttrs [cell ("to",TL.toStrict to)]
263 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts
264 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts
265 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts
266 TokenPair PairFrenchquote toks@ts ->
267 Seq.singleton $
268 TreeN (cell "q") $
269 case ts of
270 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
271 case Seq.viewr ls of
272 m :> Cell br er (TokenPlain r) ->
273 goTokens $
274 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
275 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
276 _ ->
277 goTokens $
278 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
279 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
280 goTokens $
281 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
282 _ -> goTokens toks
283 TokenPair PairHash to ->
284 Seq.singleton $
285 TreeN (cell "ref") $
286 xmlAttrs [cell ("to",TL.toStrict $ Write.textTokens to)]
287 TokenPair (PairElem name attrs) ts ->
288 Seq.singleton $
289 TreeN (cell $ xmlLocalName name) $
290 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
291 goTokens ts
292 TokenPair p ts ->
293 let (o,c) = pairBorders p ts in
294 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
295 goTokens ts `unionXml`
296 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
297 where
298 cell :: a -> Cell a
299 cell = Cell bp ep
300
301 goTokens :: Tokens -> XMLs
302 goTokens toks =
303 case Seq.viewl toks of
304 Cell bp _ep (TokenPair PairParen paren)
305 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
306 :< ts) ->
307 (<| goTokens ts) $
308 case bracket of
309 (toList -> [Cell bl el (TokenLink lnk)]) ->
310 TreeN (Cell bp eb "eref") $
311 xmlAttrs [Cell bl el ("to",lnk)] <>
312 goTokens paren
313 _ ->
314 TreeN (Cell bp eb "rref") $
315 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Write.textTokens bracket)] <>
316 goTokens paren
317 t :< ts -> go t `unionXml` goTokens ts
318 Seq.EmptyL -> mempty
319
320 -- | Unify two 'XMLs', merging border 'XmlText's if any.
321 unionXml :: XMLs -> XMLs -> XMLs
322 unionXml x y =
323 case (Seq.viewr x, Seq.viewl y) of
324 (xs :> x0, y0 :< ys) ->
325 case (x0,y0) of
326 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
327 xs `unionXml`
328 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
329 ys
330 _ -> x <> y
331 (Seq.EmptyR, _) -> y
332 (_, Seq.EmptyL) -> x
333
334
335 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
336 spanlBar name = first unKeyBar . spanBar
337 where
338 unKeyBar :: TCTs -> TCTs
339 unKeyBar = (=<<) $ \case
340 TreeN (unCell -> KeyBar{}) ts -> ts
341 _ -> mempty
342 spanBar =
343 Seq.spanl $ \case
344 TreeN (unCell -> KeyBar n _) _ | n == name -> True
345 _ -> False
346
347 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
348 spanlItems liKey ts =
349 let (lis, ts') = spanLIs ts in
350 foldl' accumLIs (mempty,ts') lis
351 where
352 spanLIs = Seq.spanl $ \case
353 TreeN (unCell -> liKey -> True) _ -> True
354 Tree0 toks ->
355 (`any` toks) $ \case
356 (unCell -> TokenPair (PairElem "li" _) _) -> True
357 _ -> False
358 {-
359 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
360 [unCell -> TokenPair (PairElem "li" _) _] -> True
361 _ -> False
362 -}
363 _ -> False
364 accumLIs acc@(oks,kos) t =
365 case t of
366 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
367 Tree0 toks ->
368 let (ok,ko) =
369 (`Seq.spanl` toks) $ \tok ->
370 case unCell tok of
371 TokenPair (PairElem "li" _) _ -> True
372 TokenPlain txt -> Char.isSpace`Text.all`txt
373 _ -> False in
374 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
375 , if null ko then kos else Tree0 ko<|kos )
376 _ -> acc
377 rmTokenPlain =
378 Seq.filter $ \case
379 (unCell -> TokenPlain{}) -> False
380 _ -> True
381
382 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
383 spanlKeyColon name =
384 Seq.spanl $ \case
385 TreeN (unCell -> KeyBar n _) _ -> n == name
386 TreeN (unCell -> KeyGreat n _) _ -> n == name
387 _ -> False
388
389 spanlBrackets :: TCTs -> (TCTs, TCTs)
390 spanlBrackets =
391 Seq.spanl $ \case
392 TreeN (unCell -> KeyBrackets{}) _ -> True
393 _ -> False
394
395 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
396 spanlTokens =
397 first ((\case
398 Tree0 ts -> ts
399 _ -> undefined) <$>) .
400 Seq.spanl (\case
401 Tree0{} -> True
402 _ -> False)
403
404 getAttrId :: TCTs -> Text
405 getAttrId ts =
406 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
407 Just (Tree0 toks) -> TL.toStrict $ Write.textTokens toks
408 _ -> ""
409
410 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
411 setXmlAttr a@(unCell -> (k, _v)) as =
412 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
413 Just idx -> Seq.update idx a as
414 Nothing -> a <| as
415
416 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
417 defXmlAttr a@(unCell -> (k, _v)) as =
418 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
419 Just _idx -> as
420 Nothing -> a <| as
421
422 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
423 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
424
425 {-
426 xmlAttr :: XmlAttrs -> (Text,Attr) -> XmlAttrs
427 xmlAttr acc (_,Attr{..}) = Map.insert (xmlLocalName attr_name) attr_value acc
428 -- TODO: conflict
429 -}
430
431 {-
432 d_Attributes :: XmlAttrs -> DTC -> DTC
433 d_Attributes = flip $ Map.foldrWithKey $ \n v ->
434 B.AddCustomAttribute (B.Text n) (B.Text v)
435 -}
436
437 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
438 partitionAttributesChildren ts = (attrs,cs)
439 where
440 (as,cs) = (`Seq.partition` ts) $ \case
441 TreeN (unCell -> KeyEqual{}) _cs -> True
442 _ -> False
443 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
444 attr = \case
445 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
446 Cell bp ep (xmlLocalName n, v)
447 where
448 v = TL.toStrict $
449 Write.text Write.config_text{Write.config_text_escape = False} $
450 TreeSeq.mapAlsoNode (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
451 _ -> undefined
452
453 elems :: Set Text
454 elems =
455 [ "about"
456 , "abstract"
457 , "address"
458 , "alias"
459 , "annotation"
460 , "area"
461 , "artwork"
462 , "aside"
463 , "audio"
464 , "author"
465 , "authors"
466 , "bcp14"
467 , "br"
468 , "call"
469 , "city"
470 , "code"
471 , "comment"
472 , "comments"
473 , "country"
474 , "date"
475 , "dd"
476 , "define"
477 , "del"
478 , "div"
479 , "dl"
480 , "document"
481 , "dt"
482 , "editor"
483 , "email"
484 , "embed"
485 , "eref"
486 , "fax"
487 , "feed"
488 , "feedback"
489 , "figure"
490 , "filter"
491 , "format"
492 , "from"
493 , "h"
494 , "hi"
495 , "html5"
496 , "i"
497 , "index"
498 , "iref"
499 , "keyword"
500 , "li"
501 , "link"
502 , "name"
503 , "note"
504 , "ol"
505 , "organization"
506 , "para"
507 , "postamble"
508 , "preamble"
509 , "q"
510 , "ref"
511 , "reference"
512 , "references"
513 , "region"
514 , "rref"
515 , "sc"
516 , "section"
517 , "serie"
518 , "source"
519 , "span"
520 , "street"
521 , "style"
522 , "sub"
523 , "sup"
524 , "table"
525 , "tbody"
526 , "td"
527 , "tel"
528 , "tfoot"
529 , "title"
530 , "th"
531 , "thead"
532 , "toc"
533 , "tof"
534 , "tr"
535 , "tt"
536 , "u"
537 , "ul"
538 , "uri"
539 , "version"
540 , "video"
541 , "workgroup"
542 , "xml"
543 , "zipcode"
544 ]