]> Git — Sourcephile - doclang.git/blob - Language/TCT/Write/XML.hs
Add DTC.Index.
[doclang.git] / Language / TCT / Write / XML.hs
1 {-# LANGUAGE FlexibleContexts #-}
2 {-# LANGUAGE FlexibleInstances #-}
3 {-# LANGUAGE OverloadedLists #-}
4 {-# LANGUAGE OverloadedStrings #-}
5 {-# LANGUAGE ViewPatterns #-}
6 {-# OPTIONS_GHC -fno-warn-orphans #-}
7 module Language.TCT.Write.XML where
8
9 import Control.Arrow (first)
10 import Control.Monad (Monad(..), (=<<))
11 import Data.Bool
12 import Data.Eq (Eq(..))
13 import Data.Foldable (null, foldl', any)
14 import Data.Function (($), (.), id)
15 import Data.Functor (Functor(..), (<$>))
16 import Data.Maybe (Maybe(..), maybe)
17 import Data.Monoid (Monoid(..))
18 import Data.Semigroup (Semigroup(..))
19 import Data.Sequence (Seq, ViewL(..), ViewR(..), (<|), (|>))
20 import Data.Set (Set)
21 import Data.Text (Text)
22 import Data.TreeSeq.Strict (Tree(..))
23 import GHC.Exts (toList)
24 import Prelude (undefined)
25 import qualified Data.Char as Char
26 import qualified Data.List as List
27 import qualified Data.Sequence as Seq
28 import qualified Data.Text as Text
29 import qualified Data.Text.Lazy as TL
30 import qualified Language.TCT.Write.Text as Write
31 import qualified System.FilePath as FP
32
33 import Text.Blaze.XML ()
34 import Language.TCT hiding (Parser)
35 import Language.XML
36 import qualified Data.TreeSeq.Strict as TreeSeq
37
38 -- * Type 'InhXml'
39 data InhXml
40 = InhXml
41 { inhXml_figure :: Bool
42 , inhXml_tree0 :: [Pos -> XMLs -> XML]
43 , inhXml_titles :: Seq Tokens
44 }
45 inhXml :: InhXml
46 inhXml = InhXml
47 { inhXml_figure = False
48 , inhXml_tree0 = []
49 , inhXml_titles = mempty
50 }
51
52 mimetype :: Text -> Maybe Text
53 mimetype "hs" = Just "text/x-haskell"
54 mimetype "sh" = Just "text/x-shellscript"
55 mimetype "shell" = Just "text/x-shellscript"
56 mimetype "shellscript" = Just "text/x-shellscript"
57 mimetype _ = Nothing
58
59 xmlPhantom :: XmlName -> Pos -> XMLs -> XML
60 xmlPhantom n bp = TreeN (Cell bp bp n)
61 xmlPara :: Pos -> XMLs -> XML
62 xmlPara = xmlPhantom "para"
63 xmlTitle :: Pos -> XMLs -> XML
64 xmlTitle = xmlPhantom "title"
65 xmlName :: Pos -> XMLs -> XML
66 xmlName bp (toList -> [Tree0 (unCell -> XmlText t)]) = Tree0 (Cell bp bp $ XmlAttr "name" t)
67 xmlName bp ts = xmlPhantom "name" bp ts
68
69 xmlDocument :: TCTs -> XMLs
70 xmlDocument trees =
71 case Seq.viewl trees of
72 TreeN (unCell -> KeySection{}) vs :< ts ->
73 case spanlTokens vs of
74 (titles@(Seq.viewl -> (Seq.viewl -> Cell bp _ep _ :< _) :< _), vs') ->
75 let vs'' =
76 case Seq.findIndexL
77 (\case
78 TreeN (unCell -> KeyColon "about" _) _ -> True
79 _ -> False) vs' of
80 Just{} -> vs'
81 Nothing -> TreeN (Cell bp bp $ KeyColon "about" "") mempty <| vs'
82 in
83 xmlTCTs inhXml
84 { inhXml_titles = titles
85 , inhXml_figure = True
86 , inhXml_tree0 = List.repeat xmlPara
87 } vs'' <>
88 xmlTCTs inhXml ts
89 _ -> xmlTCTs inhXml trees
90 _ -> xmlTCTs inhXml trees
91
92 xmlTCTs :: InhXml -> TCTs -> XMLs
93 xmlTCTs inh_orig = go inh_orig
94 where
95 go :: InhXml -> TCTs -> XMLs
96 go inh trees =
97 case Seq.viewl trees of
98 TreeN (Cell bp ep (KeyBar n _)) _ :< _
99 | (body,ts) <- spanlBar n trees
100 , not (null body) ->
101 (<| go inh ts) $
102 TreeN (Cell bp ep "artwork") $
103 maybe id (\v -> (Tree0 (Cell bp ep (XmlAttr "type" v)) <|)) (mimetype n) $
104 body >>= xmlTCT inh{inhXml_tree0=[]}
105
106 TreeN key@(unCell -> KeyColon n _) cs :< ts
107 | (cs',ts') <- spanlKeyColon n ts
108 , not (null cs') ->
109 go inh $ TreeN key (cs<>cs') <| ts'
110
111 TreeN (Cell bp ep KeyBrackets{}) _ :< _
112 | (rl,ts) <- spanlBrackets trees
113 , not (null rl) ->
114 (<| go inh ts) $
115 TreeN (Cell bp ep "rl") $
116 rl >>= xmlTCT inh_orig
117
118 _ | (ul,ts) <- spanlItems (==KeyDash) trees
119 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ul ->
120 (<| go inh ts) $
121 TreeN (Cell bp ep "ul") $
122 ul >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
123
124 _ | (ol,ts) <- spanlItems (\case KeyDot{} -> True; _ -> False) trees
125 , TreeN (Cell bp ep _) _ :< _ <- Seq.viewl ol ->
126 (<| go inh ts) $
127 TreeN (Cell bp ep "ol") $
128 ol >>= xmlTCT inh{inhXml_tree0=List.repeat xmlPara}
129
130 t@(Tree0 toks) :< ts | isTokenElem toks ->
131 xmlTCT inh_orig t <>
132 go inh ts
133
134 t@(Tree0 toks) :< ts ->
135 case inhXml_tree0 inh of
136 [] ->
137 xmlTCT inh_orig t <>
138 go inh{inhXml_tree0=[]} ts
139 x:xs ->
140 case Seq.viewl toks of
141 EmptyL -> go inh{inhXml_tree0=xs} ts
142 Cell bp _ep _ :< _ ->
143 (<| go inh{inhXml_tree0=xs} ts) $
144 x bp $
145 xmlTCT inh_orig t
146
147 t:<ts ->
148 xmlTCT inh_orig t <>
149 go inh ts
150
151 _ -> mempty
152
153 xmlTCT :: InhXml -> TCT -> XMLs
154 xmlTCT inh tr =
155 case tr of
156 TreeN (Cell bp ep KeySection{}) ts ->
157 let (attrs,body) = partitionAttributesChildren ts in
158 let inh' = inh
159 { inhXml_tree0 = xmlTitle : List.repeat xmlPara
160 , inhXml_figure = True
161 } in
162 Seq.singleton $
163 TreeN (Cell bp ep "section") $
164 xmlAttrs (defXmlAttr (Cell ep ep ("id", getAttrId body)) attrs) <>
165 xmlTCTs inh' body
166
167 TreeN key@(Cell bp ep (KeyColon kn _)) ts ->
168 let (attrs,body) = partitionAttributesChildren ts in
169 let inh' = inh { inhXml_tree0 =
170 case kn of
171 "about" -> xmlTitle : xmlTitle : List.repeat xmlPara
172 "reference" -> xmlTitle : xmlTitle : List.repeat xmlPara
173 "author" -> List.repeat xmlName
174 _ -> []
175 } in
176 case () of
177 _ | kn == "about" -> xmlAbout inh' key attrs body
178
179 _ | inhXml_figure inh && not (kn`List.elem`elems) ->
180 Seq.singleton $
181 TreeN (Cell bp ep "figure") $
182 xmlAttrs (setXmlAttr (Cell ep ep ("type", kn)) attrs) <>
183 case toList body of
184 [Tree0{}] -> xmlTCTs inh'{inhXml_tree0 = List.repeat xmlPara} body
185 _ -> xmlTCTs inh'{inhXml_tree0 = xmlTitle : List.repeat xmlPara} body
186
187 _ -> Seq.singleton $ xmlKey inh' key attrs body
188
189 TreeN key ts -> Seq.singleton $ xmlKey inh key mempty ts
190
191 Tree0 ts -> xmlTokens ts
192
193 xmlAbout ::
194 InhXml ->
195 Cell Key -> Seq (Cell (XmlName, Text)) ->
196 TCTs -> XMLs
197 xmlAbout inh key attrs body =
198 Seq.singleton $
199 xmlKey inh key attrs $
200 case Seq.viewl (inhXml_titles inh) of
201 (Seq.viewl -> Cell bt _et _ :< _) :< _ ->
202 ((<$> inhXml_titles inh) $ \title ->
203 TreeN (Cell bt bt $ KeyColon "title" "") $
204 Seq.singleton $ Tree0 title)
205 <> body
206 _ -> body
207
208 xmlKey :: InhXml -> Cell Key -> Seq (Cell (XmlName, Text)) -> TCTs -> XML
209 xmlKey inh (Cell bp ep key) attrs ts =
210 case key of
211 KeyColon n _wh -> d_key n
212 KeyGreat n _wh -> d_key n
213 KeyEqual n _wh -> d_key n
214 KeyBar n _wh -> d_key n
215 KeyDot _n -> TreeN (cell "li") $ xmlTCTs inh ts
216 KeyDash -> TreeN (cell "li") $ xmlTCTs inh ts
217 KeyDashDash -> Tree0 $ cell $ XmlComment $ TL.toStrict com
218 where
219 com :: TL.Text
220 com =
221 Write.text Write.config_text $
222 TreeSeq.mapAlsoKey
223 (cell1 . unCell)
224 (\_path -> fmap $ cell1 . unCell) <$> ts
225 KeyLower n as -> TreeN (cell "artwork") $ xmlTCTs inh ts
226 KeyBrackets ident ->
227 let inh' = inh{inhXml_figure = False} in
228 TreeN (cell "reference") $
229 xmlAttrs (setXmlAttr (Cell ep ep ("id", ident)) attrs) <>
230 xmlTCTs inh' ts
231 KeyDotSlash p ->
232 TreeN (cell "include") $
233 xmlAttrs [cell ("href", Text.pack $ FP.replaceExtension p "dtc")] <>
234 xmlTCTs inh ts
235 where
236 cell :: a -> Cell a
237 cell = Cell bp ep
238 d_key :: Text -> XML
239 d_key n =
240 TreeN (cell $ xmlLocalName n) $
241 xmlAttrs attrs <>
242 xmlTCTs inh ts
243
244 xmlTokens :: Tokens -> XMLs
245 xmlTokens tok = goTokens tok
246 where
247 go :: Cell Token -> XMLs
248 go (Cell bp ep tk) =
249 case tk of
250 TokenPlain t -> Seq.singleton $ Tree0 $ cell $ XmlText t
251 TokenTag t -> Seq.singleton $ TreeN (cell "ref") $ xmlAttrs [cell ("to",t)]
252 TokenEscape c -> Seq.singleton $ Tree0 $ cell $ XmlText $ Text.singleton c
253 TokenLink lnk -> Seq.singleton $
254 TreeN (cell "eref") $
255 xmlAttrs [cell ("to",lnk)] |>
256 Tree0 (cell $ XmlText lnk)
257 TokenPair PairBracket ts | to <- Write.textTokens ts
258 , TL.all (\c -> Char.isAlphaNum c || Char.isSymbol c) to ->
259 Seq.singleton $
260 TreeN (cell "rref") $
261 xmlAttrs [cell ("to",TL.toStrict to)]
262 TokenPair PairStar ts -> Seq.singleton $ TreeN (cell "b") $ goTokens ts
263 TokenPair PairSlash ts -> Seq.singleton $ TreeN (cell "i") $ goTokens ts
264 TokenPair PairBackquote ts -> Seq.singleton $ TreeN (cell "code") $ goTokens ts
265 TokenPair PairFrenchquote toks@ts ->
266 Seq.singleton $
267 TreeN (cell "q") $
268 case ts of
269 (Seq.viewl -> Cell bl el (TokenPlain l) :< ls) ->
270 case Seq.viewr ls of
271 m :> Cell br er (TokenPlain r) ->
272 goTokens $
273 Cell bl el (TokenPlain (Text.dropWhile Char.isSpace l))
274 <|(m|>Cell br er (TokenPlain (Text.dropWhileEnd Char.isSpace r)))
275 _ ->
276 goTokens $
277 Cell bl el (TokenPlain (Text.dropAround Char.isSpace l)) <| ls
278 (Seq.viewr -> rs :> Cell br er (TokenPlain r)) ->
279 goTokens $
280 rs |> Cell br er (TokenPlain (Text.dropAround Char.isSpace r))
281 _ -> goTokens toks
282 TokenPair PairHash to ->
283 Seq.singleton $
284 TreeN (cell "ref") $
285 xmlAttrs [cell ("to",TL.toStrict $ Write.textTokens to)]
286 TokenPair (PairElem name attrs) ts ->
287 Seq.singleton $
288 TreeN (cell $ xmlLocalName name) $
289 xmlAttrs (Seq.fromList $ (\(_wh,Attr{..}) -> cell (xmlLocalName attr_name,attr_value)) <$> attrs) <>
290 goTokens ts
291 TokenPair p ts ->
292 let (o,c) = pairBorders p ts in
293 Seq.singleton (Tree0 $ Cell bp bp $ XmlText o) `unionXml`
294 goTokens ts `unionXml`
295 Seq.singleton (Tree0 $ Cell ep ep $ XmlText c)
296 where
297 cell :: a -> Cell a
298 cell = Cell bp ep
299
300 goTokens :: Tokens -> XMLs
301 goTokens toks =
302 case Seq.viewl toks of
303 Cell bp _ep (TokenPair PairParen paren)
304 :< (Seq.viewl -> Cell bb eb (TokenPair PairBracket bracket)
305 :< ts) ->
306 (<| goTokens ts) $
307 case bracket of
308 (toList -> [Cell bl el (TokenLink lnk)]) ->
309 TreeN (Cell bp eb "eref") $
310 xmlAttrs [Cell bl el ("to",lnk)] <>
311 goTokens paren
312 _ ->
313 TreeN (Cell bp eb "rref") $
314 xmlAttrs [Cell bb eb ("to",TL.toStrict $ Write.textTokens bracket)] <>
315 goTokens paren
316 t :< ts -> go t `unionXml` goTokens ts
317 Seq.EmptyL -> mempty
318
319 -- | Unify two 'XMLs', merging border 'XmlText's if any.
320 unionXml :: XMLs -> XMLs -> XMLs
321 unionXml x y =
322 case (Seq.viewr x, Seq.viewl y) of
323 (xs :> x0, y0 :< ys) ->
324 case (x0,y0) of
325 (Tree0 (Cell bx _ex (XmlText tx)), Tree0 (Cell _by ey (XmlText ty))) ->
326 xs `unionXml`
327 Seq.singleton (Tree0 $ Cell bx ey $ XmlText $ tx <> ty) `unionXml`
328 ys
329 _ -> x <> y
330 (Seq.EmptyR, _) -> y
331 (_, Seq.EmptyL) -> x
332
333
334 spanlBar :: Name -> TCTs -> (TCTs, TCTs)
335 spanlBar name = first unKeyBar . spanBar
336 where
337 unKeyBar :: TCTs -> TCTs
338 unKeyBar = (=<<) $ \case
339 TreeN (unCell -> KeyBar{}) ts -> ts
340 _ -> mempty
341 spanBar =
342 Seq.spanl $ \case
343 TreeN (unCell -> KeyBar n _) _ | n == name -> True
344 _ -> False
345
346 spanlItems :: (Key -> Bool) -> TCTs -> (TCTs, TCTs)
347 spanlItems liKey ts =
348 let (lis, ts') = spanLIs ts in
349 foldl' accumLIs (mempty,ts') lis
350 where
351 spanLIs = Seq.spanl $ \case
352 TreeN (unCell -> liKey -> True) _ -> True
353 Tree0 toks ->
354 (`any` toks) $ \case
355 (unCell -> TokenPair (PairElem "li" _) _) -> True
356 _ -> False
357 {-
358 case toList $ Seq.dropWhileR (isTokenWhite . unCell) toks of
359 [unCell -> TokenPair (PairElem "li" _) _] -> True
360 _ -> False
361 -}
362 _ -> False
363 accumLIs acc@(oks,kos) t =
364 case t of
365 TreeN (unCell -> liKey -> True) _ -> (oks|>t,kos)
366 Tree0 toks ->
367 let (ok,ko) =
368 (`Seq.spanl` toks) $ \tok ->
369 case unCell tok of
370 TokenPair (PairElem "li" _) _ -> True
371 TokenPlain txt -> Char.isSpace`Text.all`txt
372 _ -> False in
373 ( if null ok then oks else oks|>Tree0 (rmTokenPlain ok)
374 , if null ko then kos else Tree0 ko<|kos )
375 _ -> acc
376 rmTokenPlain =
377 Seq.filter $ \case
378 (unCell -> TokenPlain{}) -> False
379 _ -> True
380
381 spanlKeyColon :: Name -> TCTs -> (TCTs, TCTs)
382 spanlKeyColon name =
383 Seq.spanl $ \case
384 TreeN (unCell -> KeyBar n _) _ -> n == name
385 TreeN (unCell -> KeyGreat n _) _ -> n == name
386 _ -> False
387
388 spanlBrackets :: TCTs -> (TCTs, TCTs)
389 spanlBrackets =
390 Seq.spanl $ \case
391 TreeN (unCell -> KeyBrackets{}) _ -> True
392 _ -> False
393
394 spanlTokens :: TCTs -> (Seq Tokens, TCTs)
395 spanlTokens =
396 first ((\case
397 Tree0 ts -> ts
398 _ -> undefined) <$>) .
399 Seq.spanl (\case
400 Tree0{} -> True
401 _ -> False)
402
403 getAttrId :: TCTs -> Text
404 getAttrId ts =
405 case Seq.index ts <$> Seq.findIndexL TreeSeq.isTree0 ts of
406 Just (Tree0 toks) -> TL.toStrict $ Write.textTokens toks
407 _ -> ""
408
409 setXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
410 setXmlAttr a@(unCell -> (k, _v)) as =
411 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
412 Just idx -> Seq.update idx a as
413 Nothing -> a <| as
414
415 defXmlAttr :: Cell (XmlName, Text) -> Seq (Cell (XmlName, Text)) -> Seq (Cell (XmlName, Text))
416 defXmlAttr a@(unCell -> (k, _v)) as =
417 case Seq.findIndexL (\(unCell -> (n,_v)) -> n == k) as of
418 Just _idx -> as
419 Nothing -> a <| as
420
421 xmlAttrs :: Seq (Cell (XmlName,Text)) -> XMLs
422 xmlAttrs = ((\(Cell bp ep (n,v)) -> Tree0 (Cell bp ep $ XmlAttr n v)) <$>)
423
424 {-
425 xmlAttr :: XmlAttrs -> (Text,Attr) -> XmlAttrs
426 xmlAttr acc (_,Attr{..}) = Map.insert (xmlLocalName attr_name) attr_value acc
427 -- TODO: conflict
428 -}
429
430 {-
431 d_Attributes :: XmlAttrs -> DTC -> DTC
432 d_Attributes = flip $ Map.foldrWithKey $ \n v ->
433 B.AddCustomAttribute (B.Text n) (B.Text v)
434 -}
435
436 partitionAttributesChildren :: TCTs -> (Seq (Cell (XmlName, Text)), TCTs)
437 partitionAttributesChildren ts = (attrs,cs)
438 where
439 (as,cs) = (`Seq.partition` ts) $ \case
440 TreeN (unCell -> KeyEqual{}) _cs -> True
441 _ -> False
442 attrs = foldl' (\acc a -> acc |> attr a) Seq.empty as
443 attr = \case
444 TreeN (Cell bp ep (KeyEqual n _wh)) a ->
445 Cell bp ep (xmlLocalName n, v)
446 where
447 v = TL.toStrict $
448 Write.text Write.config_text{Write.config_text_escape = False} $
449 TreeSeq.mapAlsoKey (cell1 . unCell) (\_path -> fmap $ cell1 . unCell) <$> a
450 _ -> undefined
451
452 elems :: Set Text
453 elems =
454 [ "about"
455 , "abstract"
456 , "address"
457 , "alias"
458 , "annotation"
459 , "area"
460 , "artwork"
461 , "aside"
462 , "audio"
463 , "author"
464 , "authors"
465 , "bcp14"
466 , "br"
467 , "call"
468 , "city"
469 , "code"
470 , "comment"
471 , "comments"
472 , "country"
473 , "date"
474 , "dd"
475 , "define"
476 , "del"
477 , "div"
478 , "dl"
479 , "document"
480 , "dt"
481 , "editor"
482 , "email"
483 , "embed"
484 , "eref"
485 , "fax"
486 , "feed"
487 , "feedback"
488 , "figure"
489 , "filter"
490 , "format"
491 , "from"
492 , "h"
493 , "hi"
494 , "html5"
495 , "i"
496 , "index"
497 , "iref"
498 , "keyword"
499 , "li"
500 , "link"
501 , "name"
502 , "note"
503 , "ol"
504 , "organization"
505 , "para"
506 , "postamble"
507 , "preamble"
508 , "q"
509 , "ref"
510 , "reference"
511 , "region"
512 , "rl"
513 , "rref"
514 , "sc"
515 , "section"
516 , "serie"
517 , "source"
518 , "span"
519 , "street"
520 , "style"
521 , "sub"
522 , "sup"
523 , "table"
524 , "tbody"
525 , "td"
526 , "tel"
527 , "tfoot"
528 , "title"
529 , "th"
530 , "thead"
531 , "toc"
532 , "tof"
533 , "tr"
534 , "tt"
535 , "ul"
536 , "uri"
537 , "version"
538 , "video"
539 , "workgroup"
540 , "xml"
541 , "zipcode"
542 ]