]> Git — Sourcephile - julm/worksheets.git/blob - src/Worksheets/Writing/Rosetta.hs
update
[julm/worksheets.git] / src / Worksheets / Writing / Rosetta.hs
1 {-# LANGUAGE OverloadedLists #-}
2
3 module Worksheets.Writing.Rosetta where
4
5 import Data.ByteString.Builder (Builder)
6 import Data.Char qualified as Char
7 import Data.List qualified as List
8 import Data.Map.Strict qualified as Map
9 import Data.Text qualified as Text
10 import Data.Text.Short qualified as ShortText
11 import System.FilePath.Posix ((</>))
12 import System.FilePath.Posix qualified as File
13 import Text.Blaze
14 import Text.Blaze.Html5 qualified as H
15 import Text.Blaze.Html5.Attributes qualified as HA
16 import Text.Blaze.Renderer.Utf8 qualified as Blaze
17 import Prelude (error)
18
19 import Language.Chinese (ChineseDict (..), ChineseDictEntries (..))
20 import Language.Chinese qualified
21 import Paths_worksheets qualified as Self
22 import Utils.Char as Char
23 import Utils.HTML
24 import Utils.Prelude
25
26 data Langue
27 = LangueFrançais
28 | LangueAnglais
29 | LangueMandarin
30 | LangueMandarinPinyin
31 | LanguePhonetic
32 deriving (Eq, Ord, Show)
33 instance HasTypeDefault Langue where
34 typeDefault = LangueAnglais
35
36 data Length
37 = LengthFractionalRatio Natural
38 | LengthMillimeters Double
39 deriving (Eq, Show)
40 instance ToCSS Length where
41 toCSS = \case
42 LengthFractionalRatio x -> show x <> "fr"
43 LengthMillimeters x -> show x <> "mm"
44 cm :: Double -> Length
45 cm = LengthMillimeters . (* 10)
46 mm :: Double -> Length
47 mm = LengthMillimeters
48 fr :: Natural -> Length
49 fr = LengthFractionalRatio
50
51 data RosettaDifficulty = RosettaDifficulty
52 { rosettaDifficultyCharWidth :: Length
53 , rosettaDifficultyCharHeight :: Length
54 , rosettaDifficultyWordSpacing :: Length
55 , rosettaDifficultyFontSize :: Length
56 , rosettaDifficultyHiddenPatterns :: Bool
57 }
58 deriving (Eq, Show, Generic)
59 instance HasTypeDefault RosettaDifficulty where
60 typeDefault =
61 RosettaDifficulty
62 { rosettaDifficultyCharWidth = 1 & cm
63 , rosettaDifficultyCharHeight = 1 & cm
64 , rosettaDifficultyWordSpacing = 1 & cm
65 , rosettaDifficultyFontSize = 1 & cm
66 , rosettaDifficultyHiddenPatterns = False
67 }
68
69 cssBlockObjectFitCover :: CSSBlock
70 cssBlockObjectFitCover = ["object-fit" := "cover"]
71
72 data RosettaPart = RosettaPart
73 { rosettaPartPicture :: File.FilePath
74 , rosettaPartPictureCSS :: CSSBlock
75 , rosettaPartDescription :: ShortText
76 , rosettaPartText :: ShortText
77 , rosettaPartLangue :: Langue
78 }
79 deriving (Eq, Ord, Show, Generic)
80 deriving (HasTypeDefault) via (Generically RosettaPart)
81
82 data PageSize
83 = PageSizeA5
84 | PageSizeA4
85 | PageSizeA3
86 deriving (Eq, Ord, Show, Generic)
87 instance HasTypeDefault PageSize where
88 typeDefault = PageSizeA4
89
90 data PageOrientation
91 = PageOrientationPortrait
92 | PageOrientationLandscape
93 deriving (Eq, Ord, Show, Generic)
94 instance HasTypeDefault PageOrientation where
95 typeDefault = PageOrientationLandscape
96
97 data Rosetta = Rosetta
98 { rosettaParts :: [RosettaPart]
99 , rosettaColumnPictureWidth :: Length
100 , rosettaColumnTextWidth :: Length
101 , rosettaPageSize :: PageSize
102 , rosettaPageOrientation :: PageOrientation
103 , rosettaDifficulties :: RosettaDifficulties
104 }
105 deriving (Eq, Show, Generic)
106
107 instance HasTypeDefault Rosetta where
108 typeDefault =
109 Rosetta
110 { rosettaParts = []
111 , rosettaColumnPictureWidth = 1 & fr
112 , rosettaColumnTextWidth = 1 & fr
113 , rosettaPageSize = typeDefault
114 , rosettaPageOrientation = typeDefault
115 , rosettaDifficulties = typeDefault
116 }
117
118 rosettaPortrait m =
119 m
120 { rosettaPageOrientation = PageOrientationPortrait
121 , rosettaColumnPictureWidth = 3 & fr
122 , rosettaColumnTextWidth = 4 & fr
123 }
124 rosettaLandscape m =
125 m
126 { rosettaPageOrientation = PageOrientationLandscape
127 , rosettaColumnPictureWidth = 1 & fr
128 , rosettaColumnTextWidth = 2 & fr
129 }
130
131 type RosettaDifficulties = Map UnicodeBlock RosettaDifficulty
132
133 rosettaDifficultiesLatinBig =
134 mapInsertManyWithTypeDefault Char.unicodeBlockLatin \v ->
135 v
136 { rosettaDifficultyCharWidth = 1 & cm
137 , rosettaDifficultyCharHeight = 1 & cm
138 , rosettaDifficultyWordSpacing = 0.5 & cm
139 , rosettaDifficultyFontSize = 0.90 & cm
140 }
141
142 rosettaDifficultiesCJKBig :: Modifier RosettaDifficulties
143 rosettaDifficultiesCJKBig =
144 mapInsertManyWithTypeDefault unicodeBlockCJK \v ->
145 v
146 { rosettaDifficultyCharWidth = 1.50 & cm
147 , rosettaDifficultyCharHeight = 1.50 & cm
148 , rosettaDifficultyWordSpacing = 0.5 & cm
149 , rosettaDifficultyFontSize = 1.40 & cm
150 }
151 rosettaDifficultiesLatinHidden :: Modifier RosettaDifficulties
152 rosettaDifficultiesLatinHidden =
153 mapInsertManyWithTypeDefault unicodeBlockLatin \v ->
154 v{rosettaDifficultyHiddenPatterns = True}
155
156 rosettaDifficultiesCJKHidden :: Modifier RosettaDifficulties
157 rosettaDifficultiesCJKHidden =
158 mapInsertManyWithTypeDefault unicodeBlockCJK \v ->
159 v{rosettaDifficultyHiddenPatterns = True}
160
161 lookupPinyins :: ChineseDict -> ShortText -> [ShortText]
162 lookupPinyins (ChineseDict dict) word =
163 word
164 & (`Map.lookup` dict)
165 & fromMaybe (error $ "lookupPinyins: no entry for: " <> wordString)
166 & chinesePinyins
167 & (\ps -> if null ps then error $ "lookupPinyins: empty entry for: " <> wordString else ps)
168 where
169 wordString = word & ShortText.unpack
170
171 data Token = Token
172 { tokenText :: ShortText
173 , tokenMeta :: (Char.GeneralCategory, Maybe UnicodeBlock)
174 }
175 deriving (Eq, Ord, Show)
176
177 rosettaTokenizer :: ShortText -> [Token]
178 rosettaTokenizer s = s & ShortText.unpack & group
179 where
180 group [] = []
181 group (inpHead : inpTail) = tok : group rest
182 where
183 tok =
184 Token
185 { tokenText = inpHead : txt & ShortText.pack
186 , tokenMeta
187 }
188 tokenMeta =
189 ( inpHead & Char.generalCategory
190 , inpHead & unicodeBlock
191 )
192 (txt, rest) =
193 inpTail & List.span \c ->
194 (Char.generalCategory c, unicodeBlock c) == tokenMeta
195
196 groupByHoriz :: [Token] -> [[Token]]
197 groupByHoriz = group
198 where
199 group [] = []
200 group (inpHead : inpTail) =
201 case inpHead of
202 Token{tokenMeta = (Char.Space, _)} -> group rest
203 where
204 (_skipSpaces, rest) = inpTail & List.span onSep
205 tok -> (tok : nonSeps) : group rest
206 where
207 (nonSeps, rest) = inpTail & List.break onSep
208 where
209 onSep = \case
210 Token{tokenText, tokenMeta = (Char.Space, _)}
211 | tokenText & ShortText.unpack & all (== '\xA0') -> False
212 | otherwise -> True
213 _ -> False
214 splitWords :: [Token] -> [[Token]]
215 splitWords = group
216 where
217 group :: [Token] -> [[Token]]
218 group [] = []
219 group (inpHead : inpTail) =
220 case inpHead of
221 Token{tokenText = ShortText.unpack >>> all (== '\xA0') -> True, tokenMeta = (Char.Space, _)} -> group rest
222 where
223 (_skipSpaces, rest) = inpTail & List.span onSep
224 tok -> (tok : nonSeps) : group rest
225 where
226 (nonSeps, rest) = inpTail & List.break onSep
227 where
228 onSep = \case
229 Token{tokenText = ShortText.unpack >>> all (== '\xA0') -> True, tokenMeta = (Char.Space, _)} -> True
230 _ -> False
231
232 -- | CorrectnessNote: beware than the tokenMeta is just preserved,
233 -- it does not correspond to the pronunciation unicode code points.
234 rosettaWordPonunciations :: ChineseDict -> [Token] -> [Token]
235 rosettaWordPonunciations chineseDict toks =
236 toks & List.concatMap \tok ->
237 let tokText = tok & tokenText
238 in let tokString = tokText & ShortText.unpack
239 in case tok & tokenMeta of
240 (_, Just UnicodeBlockCJK{}) -> pinyins <&> \tokenText -> tok{tokenText}
241 where
242 pinyins :: [ShortText]
243 pinyins
244 | tokString & all Char.isNumber =
245 tokString & List.concatMap \char ->
246 char & ShortText.singleton & lookupPinyins chineseDict
247 | List.length tokTextPins == ShortText.length tokText = tokTextPins
248 | otherwise = error "rosettaWordPonunciations: pinyins length mismatch"
249 tokTextPins = tokText & lookupPinyins chineseDict
250 (_, _) -> tokString <&> \_c -> tok{tokenText = ""}
251
252 rosettaWordChars :: [Token] -> [Token]
253 rosettaWordChars toks =
254 toks & List.concatMap \tok ->
255 let tokText = tok & tokenText
256 in let tokString = tokText & ShortText.unpack
257 in tokString <&> \char ->
258 tok{tokenText = char & ShortText.singleton}
259
260 rosettaHTML :: ChineseDict -> Text -> Rosetta -> IO Builder
261 rosettaHTML chineseDict title Rosetta{..} = do
262 -- FIXME: this absolute path is not portable out of my system
263 dataPath <- Self.getDataDir <&> File.normalise
264 return $ Blaze.renderMarkupBuilder do
265 H.docTypeHtml do
266 let (pageWidth, pageHeight) = case rosettaPageOrientation of
267 PageOrientationLandscape -> (29.7 & cm, 21.0 & cm)
268 PageOrientationPortrait -> (21.0 & cm, 29.7 & cm)
269 H.head do
270 H.title $ title & H.toHtml
271 H.link
272 ! HA.rel "stylesheet"
273 ! HA.type_ "text/css"
274 ! HA.href (dataPath </> "styles/rosetta.css" & toValue)
275 -- rosettaDifficulties
276 styleCSS
277 $ fromList
278 [ [ [".script-" <> className uniScript]
279 := [ "width" := rosettaDifficultyCharWidth & toCSS
280 , "height" := rosettaDifficultyCharHeight & toCSS
281 , "font-size" := rosettaDifficultyFontSize & toCSS
282 ]
283 , [".writing-words"] := ["column-gap" := rosettaDifficultyWordSpacing & toCSS]
284 , [".writing-words-horiz"] := ["column-gap" := rosettaDifficultyWordSpacing & toCSS]
285 ]
286 | (uniScript, RosettaDifficulty{..}) <- rosettaDifficulties & toList
287 ]
288 & mconcat
289 -- print
290 styleCSS
291 $ fromList
292 [ ["@media print", node]
293 := [ "width" := pageWidth & toCSS
294 , "height" := pageHeight & toCSS
295 ]
296 | node <- ["html", "body"]
297 ]
298 styleCSS
299 [ ["@page"]
300 := [ "size"
301 := List.unwords
302 [ case rosettaPageSize of
303 PageSizeA5 -> "A5"
304 PageSizeA4 -> "A4"
305 PageSizeA3 -> "A3"
306 , case rosettaPageOrientation of
307 PageOrientationPortrait -> "portrait"
308 PageOrientationLandscape -> "landscape"
309 ]
310 ]
311 ]
312 H.body do
313 "\n"
314 let numOfParts = rosettaParts & List.length
315 H.div
316 ! classes ["main-page"]
317 $ do
318 H.div
319 ! classes
320 [ "rosetta"
321 , "sub-page"
322 , "page-" <> className rosettaPageSize <> "-" <> className rosettaPageOrientation
323 ]
324 ! styles
325 [ "grid-template-columns" := [rosettaColumnPictureWidth, rosettaColumnTextWidth] <&> toCSS & List.unwords
326 , "grid-template-rows" := "1fr" & List.replicate numOfParts & List.unwords
327 ]
328 $ do
329 forM_ rosettaParts \RosettaPart{..} -> do
330 "\n"
331 H.div ! classes ["rosetta-cell", "rosetta-cell-picture"] $ do
332 unless (rosettaPartDescription & ShortText.null) do
333 H.span ! classes ["rosetta-cell-picture-description"] $ do
334 rosettaPartDescription & H.toHtml
335 H.img
336 ! styles rosettaPartPictureCSS
337 ! HA.src ("file://" <> dataPath </> "images" </> rosettaPartPicture & toValue)
338 H.div ! classes ["rosetta-cell", "writing-words"] $ do
339 forM_ (rosettaPartText & rosettaTokenizer & groupByHoriz) \writingHoriz -> do
340 "\n"
341 H.div
342 ! classes
343 [ "writing-words-horiz"
344 ]
345 $ do
346 forM_ (writingHoriz & splitWords) \writingWord -> do
347 -- traceShowM ("writingWord"::String, writingWord)
348 let wordRow = writingWord & rosettaWordChars
349 H.div
350 ! classes
351 [ "writing-words-word"
352 , "lang-" <> className rosettaPartLangue
353 ]
354 ! styles
355 [ "grid-template-columns"
356 := [ width & toCSS
357 | Token{tokenMeta = (_, fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic) -> uniScript)} <- wordRow
358 , let width =
359 rosettaDifficulties
360 & lookupOrTypeDefault uniScript
361 & rosettaDifficultyCharWidth
362 ]
363 & List.unwords
364 ]
365 $ do
366 let wordPronunciations = writingWord & rosettaWordPonunciations chineseDict
367 unless (wordPronunciations & all (tokenText >>> ShortText.null)) do
368 forM_ wordPronunciations \cellToken -> do
369 H.div
370 ! classes
371 [ "writing-words-cell"
372 , "pronunciation"
373 ]
374 $ do
375 cellToken
376 & tokenText
377 & Language.Chinese.numberedPinyinToDiacriticPiniyn
378 & ShortText.toText
379 & Text.toLower
380 & H.toHtml
381 forM_ (["model", "input"] :: [String]) \rowKind -> do
382 forM_ wordRow \cellToken -> do
383 let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
384 H.div
385 ! classes
386 [ "writing-words-cell"
387 , rowKind
388 , "script-" <> className uniScript
389 , if rowKind
390 == "input"
391 && ( rosettaDifficulties
392 & lookupOrTypeDefault uniScript
393 & rosettaDifficultyHiddenPatterns
394 )
395 then "hidden"
396 else mempty
397 ]
398 $ do
399 cellToken & tokenText & H.toHtml