]> Git — Sourcephile - julm/worksheets.git/blob - src/Worksheets/Writing/Rosetta.hs
feat: Rosetta
[julm/worksheets.git] / src / Worksheets / Writing / Rosetta.hs
1 {-# LANGUAGE OverloadedLists #-}
2
3 module Worksheets.Writing.Rosetta where
4
5 import Data.ByteString.Builder (Builder)
6 import Data.Char qualified as Char
7 import Data.List qualified as List
8 import Data.Map.Strict qualified as Map
9 import Data.Text qualified as Text
10 import Data.Text.Short qualified as ShortText
11 import System.FilePath.Posix ((</>))
12 import System.FilePath.Posix qualified as File
13 import Text.Blaze
14 import Text.Blaze.Html5 qualified as H
15 import Text.Blaze.Html5.Attributes qualified as HA
16 import Text.Blaze.Renderer.Utf8 qualified as Blaze
17 import Prelude (error)
18
19 import Language.Chinese (ChineseDict (..), ChineseDictEntries (..))
20 import Language.Chinese qualified
21 import Paths_worksheets qualified as Self
22 import Utils.Char
23 import Utils.HTML
24 import Utils.Prelude
25
26 data Langue
27 = LangueFrançais
28 | LangueAnglais
29 | LangueMandarin
30 | LangueMandarinPinyin
31 | LanguePhonetic
32 deriving (Eq, Ord, Show)
33
34 type FractionalRatio = Double
35 type Centimeter = Double
36 data RosettaDifficulty = RosettaDifficulty
37 { rosettaDifficultyCharWidth :: Centimeter
38 , rosettaDifficultyCharHeight :: Centimeter
39 , rosettaDifficultyFontSize :: Centimeter
40 , rosettaDifficultyHiddenPatterns :: Bool
41 }
42 deriving (Eq, Ord, Show, Generic)
43 data RosettaPart = RosettaPart
44 { rosettaPartPicture :: File.FilePath
45 , rosettaPartDescription :: ShortText
46 , rosettaPartText :: ShortText
47 , rosettaPartLangue :: Langue
48 }
49 deriving (Eq, Ord, Show, Generic)
50
51 data PageSize
52 = PageSizeA5
53 | PageSizeA4
54 | PageSizeA3
55 deriving (Eq, Ord, Show, Generic)
56
57 data PageOrientation
58 = PageOrientationPortrait
59 | PageOrientationLandscape
60 deriving (Eq, Ord, Show, Generic)
61
62 data Rosetta = Rosetta
63 { rosettaParts :: [RosettaPart]
64 , -- , rosettaClasses :: [String]
65 rosettaColumnPictureWidth :: FractionalRatio
66 , rosettaColumnTextWidth :: FractionalRatio
67 , rosettaPageSize :: PageSize
68 , rosettaPageOrientation :: PageOrientation
69 , rosettaDifficulties :: Map UnicodeBlock RosettaDifficulty
70 }
71 deriving (Eq, Ord, Show, Generic)
72
73 lookupPinyins :: ChineseDict -> ShortText -> [ShortText]
74 lookupPinyins (ChineseDict dict) word =
75 word
76 & (`Map.lookup` dict)
77 & fromMaybe (error $ "lookupPinyins: no entry for: " <> wordString)
78 & chinesePinyins
79 & (\ps -> if null ps then error $ "lookupPinyins: empty entry for: " <> wordString else ps)
80 where
81 wordString = word & ShortText.unpack
82
83 cm :: Double -> String
84 cm x = show x <> "cm"
85 fr :: Double -> String
86 fr x = show x <> "fr"
87
88 data Token = Token
89 { tokenText :: ShortText
90 , tokenMeta :: (Char.GeneralCategory, Maybe UnicodeBlock)
91 }
92 deriving (Eq, Ord, Show)
93
94 rosettaTokenizer :: ShortText -> [Token]
95 rosettaTokenizer s = s & ShortText.unpack & group
96 where
97 group [] = []
98 group (inpHead : inpTail) = tok : group rest
99 where
100 tok =
101 Token
102 { tokenText = inpHead : txt & ShortText.pack
103 , tokenMeta
104 }
105 tokenMeta =
106 ( inpHead & Char.generalCategory
107 , inpHead & unicodeBlock
108 )
109 (txt, rest) =
110 inpTail & List.span \c ->
111 (Char.generalCategory c, unicodeBlock c) == tokenMeta
112
113 groupByHoriz :: [Token] -> [[Token]]
114 groupByHoriz = group
115 where
116 group [] = []
117 group (inpHead : inpTail) =
118 case inpHead of
119 Token{tokenMeta = (Char.Space, _)} -> group rest
120 where
121 (_skipSpaces, rest) = inpTail & List.span onSep
122 tok -> (tok : nonSeps) : group rest
123 where
124 (nonSeps, rest) = inpTail & List.break onSep
125 where
126 onSep = \case
127 Token{tokenText, tokenMeta = (Char.Space, _)}
128 | tokenText & ShortText.unpack & all (== '\xA0') -> False
129 | otherwise -> True
130 _ -> False
131 splitWords :: [Token] -> [[Token]]
132 splitWords = group
133 where
134 group :: [Token] -> [[Token]]
135 group [] = []
136 group (inpHead : inpTail) =
137 case inpHead of
138 Token{tokenText = ShortText.unpack >>> all (== '\xA0') -> True, tokenMeta = (Char.Space, _)} -> group rest
139 where
140 (_skipSpaces, rest) = inpTail & List.span onSep
141 tok -> (tok : nonSeps) : group rest
142 where
143 (nonSeps, rest) = inpTail & List.break onSep
144 where
145 onSep = \case
146 Token{tokenText = ShortText.unpack >>> all (== '\xA0') -> True, tokenMeta = (Char.Space, _)} -> True
147 _ -> False
148
149 -- | CorrectnessNote: beware than the tokenMeta is just preserved,
150 -- it does not correspond to the pronunciation unicode code points.
151 rosettaWordPonunciations :: ChineseDict -> [Token] -> [Token]
152 rosettaWordPonunciations chineseDict toks =
153 toks & List.concatMap \tok ->
154 let tokText = tok & tokenText
155 in let tokString = tokText & ShortText.unpack
156 in case tok & tokenMeta of
157 (_, Just UnicodeBlockCJK{}) -> pinyins <&> \tokenText -> tok{tokenText}
158 where
159 pinyins :: [ShortText]
160 pinyins
161 | tokString & all Char.isNumber =
162 tokString & List.concatMap \char ->
163 char & ShortText.singleton & lookupPinyins chineseDict
164 | List.length tokTextPins == ShortText.length tokText = tokTextPins
165 | otherwise = error "rosettaWordPonunciations: pinyins length mismatch"
166 tokTextPins = tokText & lookupPinyins chineseDict
167 (_, _) -> tokString <&> \_c -> tok{tokenText = ""}
168
169 rosettaWordChars :: [Token] -> [Token]
170 rosettaWordChars toks =
171 toks & List.concatMap \tok ->
172 let tokText = tok & tokenText
173 in let tokString = tokText & ShortText.unpack
174 in tokString <&> \char ->
175 tok{tokenText = char & ShortText.singleton}
176
177 {-
178 forM_ tokString \writingChar -> do
179 H.div
180 ! classes
181 [ "writing-words-cell"
182 , "writing-words-cell-space"
183 ]
184 $ do
185 ""
186 forM_ writingWord \writingToken -> do
187 let tokText = writingToken & tokenText
188 let tokString = tokText & ShortText.unpack
189 traceShowM ("writingToken", writingToken)
190 -}
191
192 rosetta :: ChineseDict -> Text -> Rosetta -> IO Builder
193 rosetta chineseDict title Rosetta{..} = do
194 -- FIXME: this absolute path is not portable out of my system
195 dataPath <- Self.getDataDir <&> File.normalise
196 return $ Blaze.renderMarkupBuilder do
197 H.docTypeHtml do
198 let (pageWidth, pageHeight) = case rosettaPageOrientation of
199 PageOrientationLandscape -> (29.7, 21.0)
200 PageOrientationPortrait -> (21.0, 29.7)
201 H.head do
202 H.title $ title & H.toHtml
203 H.link
204 ! HA.rel "stylesheet"
205 ! HA.type_ "text/css"
206 ! HA.href (toValue $ dataPath </> "styles/rosetta.css")
207 styleCSS
208 $ fromList
209 [ [".script-" <> className uniScript]
210 := [ "width" := rosettaDifficultyCharWidth & cm
211 , "height" := rosettaDifficultyCharHeight & cm
212 , "font-size" := rosettaDifficultyFontSize & cm
213 ]
214 | (uniScript, RosettaDifficulty{..}) <- rosettaDifficulties & Map.toList
215 ]
216 styleCSS
217 $ fromList
218 [ ["@media print", node]
219 := [ "width" := pageWidth & cm
220 , "height" := pageHeight & cm
221 ]
222 | node <- ["html", "body"]
223 ]
224 styleCSS
225 $ [ ["@page"]
226 := [ "size"
227 := List.unwords
228 [ case rosettaPageSize of
229 PageSizeA5 -> "A5"
230 PageSizeA4 -> "A4"
231 PageSizeA3 -> "A3"
232 , case rosettaPageOrientation of
233 PageOrientationPortrait -> "portrait"
234 PageOrientationLandscape -> "landscape"
235 ]
236 ]
237 ]
238 H.body do
239 "\n"
240 let numOfParts = rosettaParts & List.length
241 let rowGap = 0.25
242 H.div
243 ! classes
244 [ "main-page"
245 ]
246 $ do
247 H.div
248 ! classes
249 [ "rosetta"
250 , "sub-page"
251 , "page-" <> className rosettaPageSize <> "-" <> className rosettaPageOrientation
252 ]
253 ! styles
254 [ "grid-template-columns" := fr rosettaColumnPictureWidth <> " " <> fr rosettaColumnTextWidth
255 , "grid-template-rows"
256 :=
257 -- (pageHeight / fromIntegral numOfParts & (\x -> x - (fromIntegral numOfParts * rowGap)) & cm)
258 "1fr"
259 & List.replicate numOfParts
260 & List.unwords
261 , "row-gap" := rowGap & cm
262 ]
263 $ do
264 forM_ rosettaParts \RosettaPart{..} -> do
265 "\n"
266 H.div ! classes ["rosetta-cell", "rosetta-cell-picture"] $ do
267 unless (ShortText.null rosettaPartDescription) do
268 H.span ! classes ["rosetta-cell-picture-description"] $ do
269 rosettaPartDescription & H.toHtml
270 H.img ! HA.src ("file://" <> dataPath </> "images" </> rosettaPartPicture & toValue)
271 H.div ! classes ["rosetta-cell", "writing-words"] $ do
272 forM_ (rosettaPartText & rosettaTokenizer & groupByHoriz) \writingHoriz -> do
273 "\n"
274 H.div
275 ! classes
276 [ "writing-words-horiz"
277 ]
278 $ do
279 forM_ (writingHoriz & splitWords) \writingWord -> do
280 -- traceShowM ("writingWord"::String, writingWord)
281 let wordRow = writingWord & rosettaWordChars
282 H.div
283 ! classes
284 [ "writing-words-word"
285 , "lang-" <> className rosettaPartLangue
286 ]
287 ! styles
288 [ "grid-template-columns"
289 := [ width & cm
290 | Token{tokenMeta = (_, fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic) -> uniScript)} <- wordRow
291 , let width = rosettaDifficulties & Map.lookup uniScript & maybe 1 rosettaDifficultyCharWidth
292 ]
293 & List.unwords
294 ]
295 $ do
296 let wordPronunciations = writingWord & rosettaWordPonunciations chineseDict
297 unless (wordPronunciations & all (tokenText >>> ShortText.null)) do
298 forM_ wordPronunciations \cellToken -> do
299 H.div
300 ! classes
301 [ "writing-words-cell"
302 , "pronunciation"
303 ]
304 $ do
305 cellToken
306 & tokenText
307 & Language.Chinese.numberedPinyinToDiacriticPiniyn
308 & ShortText.toText
309 & Text.toLower
310 & H.toHtml
311 forM_ (["model", "input"] :: [String]) \rowKind -> do
312 forM_ wordRow \cellToken -> do
313 let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
314 H.div
315 ! classes
316 [ "writing-words-cell"
317 , rowKind
318 , "script-" <> className uniScript
319 , if rowKind
320 == "input"
321 && ( rosettaDifficulties
322 & Map.lookup uniScript
323 & maybe False rosettaDifficultyHiddenPatterns
324 )
325 then "hidden"
326 else mempty
327 ]
328 $ do
329 cellToken & tokenText & H.toHtml