]> Git — Sourcephile - julm/worksheets.git/blob - src/Worksheets/Writing/Rosetta.hs
feat(Rosetta): config combinators
[julm/worksheets.git] / src / Worksheets / Writing / Rosetta.hs
1 {-# LANGUAGE OverloadedLists #-}
2
3 module Worksheets.Writing.Rosetta where
4
5 import Data.ByteString.Builder (Builder)
6 import Data.Char qualified as Char
7 import Data.List qualified as List
8 import Data.Map.Strict qualified as Map
9 import Data.Text qualified as Text
10 import Data.Text.Short qualified as ShortText
11 import System.FilePath.Posix ((</>))
12 import System.FilePath.Posix qualified as File
13 import Text.Blaze
14 import Text.Blaze.Html5 qualified as H
15 import Text.Blaze.Html5.Attributes qualified as HA
16 import Text.Blaze.Renderer.Utf8 qualified as Blaze
17 import Prelude (error)
18
19 import Language.Chinese (ChineseDict (..), ChineseDictEntries (..))
20 import Language.Chinese qualified
21 import Paths_worksheets qualified as Self
22 import Utils.Char as Char
23 import Utils.HTML
24 import Utils.Prelude
25
26 data Langue
27 = LangueFrançais
28 | LangueAnglais
29 | LangueMandarin
30 | LangueMandarinPinyin
31 | LanguePhonetic
32 deriving (Eq, Ord, Show)
33 instance HasTypeDefault Langue where
34 typeDefault = LangueAnglais
35
36 data Length
37 = LengthFractionalRatio Natural
38 | LengthMillimeters Double
39 deriving (Eq, Show)
40 instance ToCSS Length where
41 toCSS = \case
42 LengthFractionalRatio x -> show x <> "fr"
43 LengthMillimeters x -> show x <> "mm"
44 cm :: Double -> Length
45 cm = LengthMillimeters . (* 10)
46 mm :: Double -> Length
47 mm = LengthMillimeters
48 fr :: Natural -> Length
49 fr = LengthFractionalRatio
50
51 data RosettaDifficulty = RosettaDifficulty
52 { rosettaDifficultyCharWidth :: Length
53 , rosettaDifficultyCharHeight :: Length
54 , rosettaDifficultyWordSpacing :: Length
55 , rosettaDifficultyFontSize :: Length
56 , rosettaDifficultyHiddenPatterns :: Bool
57 }
58 deriving (Eq, Show, Generic)
59 instance HasTypeDefault RosettaDifficulty where
60 typeDefault =
61 RosettaDifficulty
62 { rosettaDifficultyCharWidth = 1 & cm
63 , rosettaDifficultyCharHeight = 1 & cm
64 , rosettaDifficultyWordSpacing = 1 & cm
65 , rosettaDifficultyFontSize = 1 & cm
66 , rosettaDifficultyHiddenPatterns = False
67 }
68
69 data ObjectFit
70 = ObjectFitContain
71 | ObjectFitScaleDown
72 | ObjectFitCover
73 | ObjectFitFill
74 deriving (Eq, Ord, Show, Generic)
75 instance HasTypeDefault ObjectFit where
76 typeDefault = ObjectFitContain
77 instance ToCSS ObjectFit where
78 toCSS = \case
79 ObjectFitContain -> "contain"
80 ObjectFitScaleDown -> "scale-down"
81 ObjectFitCover -> "cover"
82 ObjectFitFill -> "fill"
83
84 data RosettaPart = RosettaPart
85 { rosettaPartPicture :: File.FilePath
86 , rosettaPartPictureFit :: ObjectFit
87 , rosettaPartDescription :: ShortText
88 , rosettaPartText :: ShortText
89 , rosettaPartLangue :: Langue
90 }
91 deriving (Eq, Ord, Show, Generic)
92 deriving (HasTypeDefault) via (Generically RosettaPart)
93
94 data PageSize
95 = PageSizeA5
96 | PageSizeA4
97 | PageSizeA3
98 deriving (Eq, Ord, Show, Generic)
99 instance HasTypeDefault PageSize where
100 typeDefault = PageSizeA4
101
102 data PageOrientation
103 = PageOrientationPortrait
104 | PageOrientationLandscape
105 deriving (Eq, Ord, Show, Generic)
106 instance HasTypeDefault PageOrientation where
107 typeDefault = PageOrientationLandscape
108
109 data Rosetta = Rosetta
110 { rosettaParts :: [RosettaPart]
111 , rosettaColumnPictureWidth :: Length
112 , rosettaColumnTextWidth :: Length
113 , rosettaPageSize :: PageSize
114 , rosettaPageOrientation :: PageOrientation
115 , rosettaDifficulties :: RosettaDifficulties
116 }
117 deriving (Eq, Show, Generic)
118
119 instance HasTypeDefault Rosetta where
120 typeDefault =
121 Rosetta
122 { rosettaParts = []
123 , rosettaColumnPictureWidth = 1 & fr
124 , rosettaColumnTextWidth = 1 & fr
125 , rosettaPageSize = typeDefault
126 , rosettaPageOrientation = typeDefault
127 , rosettaDifficulties = typeDefault
128 }
129
130 rosettaPortrait m =
131 m
132 { rosettaPageOrientation = PageOrientationPortrait
133 , rosettaColumnPictureWidth = 3 & fr
134 , rosettaColumnTextWidth = 4 & fr
135 }
136 rosettaLandscape m =
137 m
138 { rosettaPageOrientation = PageOrientationLandscape
139 , rosettaColumnPictureWidth = 1 & fr
140 , rosettaColumnTextWidth = 2 & fr
141 }
142
143 type RosettaDifficulties = Map UnicodeBlock RosettaDifficulty
144
145 rosettaDifficultiesLatinBig :: Modifier RosettaDifficulties
146 rosettaDifficultiesLatinBig =
147 Map.unionWith (const mod)
148 $ [ Char.UnicodeBlockLatin latin := typeDefault & mod
149 | latin <- enumAll
150 ]
151 & fromList
152 where
153 mod v =
154 v
155 { rosettaDifficultyCharWidth = 1 & cm
156 , rosettaDifficultyCharHeight = 1 & cm
157 , rosettaDifficultyWordSpacing = 0.5 & cm
158 , rosettaDifficultyFontSize = 0.90 & cm
159 }
160
161 mapMod keys mod =
162 Map.unionWith (const mod)
163 $ keys
164 & Map.fromSet (const $ typeDefault & mod)
165
166 rosettaDifficultiesCJKBig :: Modifier RosettaDifficulties
167 rosettaDifficultiesCJKBig =
168 mapMod unicodeBlockCJK \v ->
169 v
170 { rosettaDifficultyCharWidth = 1.35 & cm
171 , rosettaDifficultyCharHeight = 1.35 & cm
172 , rosettaDifficultyWordSpacing = 0.5 & cm
173 , rosettaDifficultyFontSize = 1.25 & cm
174 }
175 rosettaDifficultiesLatinHidden :: Modifier RosettaDifficulties
176 rosettaDifficultiesLatinHidden =
177 mapMod unicodeBlockLatin \v ->
178 v{rosettaDifficultyHiddenPatterns = True}
179
180 rosettaDifficultiesCJKHidden :: Modifier RosettaDifficulties
181 rosettaDifficultiesCJKHidden =
182 mapMod unicodeBlockCJK \v ->
183 v{rosettaDifficultyHiddenPatterns = True}
184
185 lookupPinyins :: ChineseDict -> ShortText -> [ShortText]
186 lookupPinyins (ChineseDict dict) word =
187 word
188 & (`Map.lookup` dict)
189 & fromMaybe (error $ "lookupPinyins: no entry for: " <> wordString)
190 & chinesePinyins
191 & (\ps -> if null ps then error $ "lookupPinyins: empty entry for: " <> wordString else ps)
192 where
193 wordString = word & ShortText.unpack
194
195 data Token = Token
196 { tokenText :: ShortText
197 , tokenMeta :: (Char.GeneralCategory, Maybe UnicodeBlock)
198 }
199 deriving (Eq, Ord, Show)
200
201 rosettaTokenizer :: ShortText -> [Token]
202 rosettaTokenizer s = s & ShortText.unpack & group
203 where
204 group [] = []
205 group (inpHead : inpTail) = tok : group rest
206 where
207 tok =
208 Token
209 { tokenText = inpHead : txt & ShortText.pack
210 , tokenMeta
211 }
212 tokenMeta =
213 ( inpHead & Char.generalCategory
214 , inpHead & unicodeBlock
215 )
216 (txt, rest) =
217 inpTail & List.span \c ->
218 (Char.generalCategory c, unicodeBlock c) == tokenMeta
219
220 groupByHoriz :: [Token] -> [[Token]]
221 groupByHoriz = group
222 where
223 group [] = []
224 group (inpHead : inpTail) =
225 case inpHead of
226 Token{tokenMeta = (Char.Space, _)} -> group rest
227 where
228 (_skipSpaces, rest) = inpTail & List.span onSep
229 tok -> (tok : nonSeps) : group rest
230 where
231 (nonSeps, rest) = inpTail & List.break onSep
232 where
233 onSep = \case
234 Token{tokenText, tokenMeta = (Char.Space, _)}
235 | tokenText & ShortText.unpack & all (== '\xA0') -> False
236 | otherwise -> True
237 _ -> False
238 splitWords :: [Token] -> [[Token]]
239 splitWords = group
240 where
241 group :: [Token] -> [[Token]]
242 group [] = []
243 group (inpHead : inpTail) =
244 case inpHead of
245 Token{tokenText = ShortText.unpack >>> all (== '\xA0') -> True, tokenMeta = (Char.Space, _)} -> group rest
246 where
247 (_skipSpaces, rest) = inpTail & List.span onSep
248 tok -> (tok : nonSeps) : group rest
249 where
250 (nonSeps, rest) = inpTail & List.break onSep
251 where
252 onSep = \case
253 Token{tokenText = ShortText.unpack >>> all (== '\xA0') -> True, tokenMeta = (Char.Space, _)} -> True
254 _ -> False
255
256 -- | CorrectnessNote: beware than the tokenMeta is just preserved,
257 -- it does not correspond to the pronunciation unicode code points.
258 rosettaWordPonunciations :: ChineseDict -> [Token] -> [Token]
259 rosettaWordPonunciations chineseDict toks =
260 toks & List.concatMap \tok ->
261 let tokText = tok & tokenText
262 in let tokString = tokText & ShortText.unpack
263 in case tok & tokenMeta of
264 (_, Just UnicodeBlockCJK{}) -> pinyins <&> \tokenText -> tok{tokenText}
265 where
266 pinyins :: [ShortText]
267 pinyins
268 | tokString & all Char.isNumber =
269 tokString & List.concatMap \char ->
270 char & ShortText.singleton & lookupPinyins chineseDict
271 | List.length tokTextPins == ShortText.length tokText = tokTextPins
272 | otherwise = error "rosettaWordPonunciations: pinyins length mismatch"
273 tokTextPins = tokText & lookupPinyins chineseDict
274 (_, _) -> tokString <&> \_c -> tok{tokenText = ""}
275
276 rosettaWordChars :: [Token] -> [Token]
277 rosettaWordChars toks =
278 toks & List.concatMap \tok ->
279 let tokText = tok & tokenText
280 in let tokString = tokText & ShortText.unpack
281 in tokString <&> \char ->
282 tok{tokenText = char & ShortText.singleton}
283
284 rosettaHTML :: ChineseDict -> Text -> Rosetta -> IO Builder
285 rosettaHTML chineseDict title Rosetta{..} = do
286 -- FIXME: this absolute path is not portable out of my system
287 dataPath <- Self.getDataDir <&> File.normalise
288 return $ Blaze.renderMarkupBuilder do
289 H.docTypeHtml do
290 let (pageWidth, pageHeight) = case rosettaPageOrientation of
291 PageOrientationLandscape -> (29.7 & cm, 21.0 & cm)
292 PageOrientationPortrait -> (21.0 & cm, 29.7 & cm)
293 H.head do
294 H.title $ title & H.toHtml
295 H.link
296 ! HA.rel "stylesheet"
297 ! HA.type_ "text/css"
298 ! HA.href (dataPath </> "styles/rosetta.css" & toValue)
299 -- rosettaDifficulties
300 styleCSS
301 $ fromList
302 [ [ [".script-" <> className uniScript]
303 := [ "width" := rosettaDifficultyCharWidth & toCSS
304 , "height" := rosettaDifficultyCharHeight & toCSS
305 , "font-size" := rosettaDifficultyFontSize & toCSS
306 ]
307 , [".writing-words"] := ["column-gap" := rosettaDifficultyWordSpacing & toCSS]
308 , [".writing-words-horiz"] := ["column-gap" := rosettaDifficultyWordSpacing & toCSS]
309 ]
310 | (uniScript, RosettaDifficulty{..}) <- rosettaDifficulties & toList
311 ]
312 & mconcat
313 -- print
314 styleCSS
315 $ fromList
316 [ ["@media print", node]
317 := [ "width" := pageWidth & toCSS
318 , "height" := pageHeight & toCSS
319 ]
320 | node <- ["html", "body"]
321 ]
322 styleCSS
323 [ ["@page"]
324 := [ "size"
325 := List.unwords
326 [ case rosettaPageSize of
327 PageSizeA5 -> "A5"
328 PageSizeA4 -> "A4"
329 PageSizeA3 -> "A3"
330 , case rosettaPageOrientation of
331 PageOrientationPortrait -> "portrait"
332 PageOrientationLandscape -> "landscape"
333 ]
334 ]
335 ]
336 H.body do
337 "\n"
338 let numOfParts = rosettaParts & List.length
339 H.div
340 ! classes ["main-page"]
341 $ do
342 H.div
343 ! classes
344 [ "rosetta"
345 , "sub-page"
346 , "page-" <> className rosettaPageSize <> "-" <> className rosettaPageOrientation
347 ]
348 ! styles
349 [ "grid-template-columns" := [rosettaColumnPictureWidth, rosettaColumnTextWidth] <&> toCSS & List.unwords
350 , "grid-template-rows" := "1fr" & List.replicate numOfParts & List.unwords
351 ]
352 $ do
353 forM_ rosettaParts \RosettaPart{..} -> do
354 "\n"
355 H.div ! classes ["rosetta-cell", "rosetta-cell-picture"] $ do
356 unless (rosettaPartDescription & ShortText.null) do
357 H.span ! classes ["rosetta-cell-picture-description"] $ do
358 rosettaPartDescription & H.toHtml
359 H.img
360 ! styles ["object-fit" := rosettaPartPictureFit & toCSS]
361 ! HA.src ("file://" <> dataPath </> "images" </> rosettaPartPicture & toValue)
362 H.div ! classes ["rosetta-cell", "writing-words"] $ do
363 forM_ (rosettaPartText & rosettaTokenizer & groupByHoriz) \writingHoriz -> do
364 "\n"
365 H.div
366 ! classes
367 [ "writing-words-horiz"
368 ]
369 $ do
370 forM_ (writingHoriz & splitWords) \writingWord -> do
371 -- traceShowM ("writingWord"::String, writingWord)
372 let wordRow = writingWord & rosettaWordChars
373 H.div
374 ! classes
375 [ "writing-words-word"
376 , "lang-" <> className rosettaPartLangue
377 ]
378 ! styles
379 [ "grid-template-columns"
380 := [ width & toCSS
381 | Token{tokenMeta = (_, fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic) -> uniScript)} <- wordRow
382 , let width =
383 rosettaDifficulties
384 & lookupOrTypeDefault uniScript
385 & rosettaDifficultyCharWidth
386 ]
387 & List.unwords
388 ]
389 $ do
390 let wordPronunciations = writingWord & rosettaWordPonunciations chineseDict
391 unless (wordPronunciations & all (tokenText >>> ShortText.null)) do
392 forM_ wordPronunciations \cellToken -> do
393 H.div
394 ! classes
395 [ "writing-words-cell"
396 , "pronunciation"
397 ]
398 $ do
399 cellToken
400 & tokenText
401 & Language.Chinese.numberedPinyinToDiacriticPiniyn
402 & ShortText.toText
403 & Text.toLower
404 & H.toHtml
405 forM_ (["model", "input"] :: [String]) \rowKind -> do
406 forM_ wordRow \cellToken -> do
407 let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
408 H.div
409 ! classes
410 [ "writing-words-cell"
411 , rowKind
412 , "script-" <> className uniScript
413 , if rowKind
414 == "input"
415 && ( rosettaDifficulties
416 & lookupOrTypeDefault uniScript
417 & rosettaDifficultyHiddenPatterns
418 )
419 then "hidden"
420 else mempty
421 ]
422 $ do
423 cellToken & tokenText & H.toHtml