{-# LANGUAGE OverloadedLists #-} module Worksheets.Writing.Rosetta where import Data.ByteString.Builder (Builder) import Data.Char qualified as Char import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Text qualified as Text import Data.Text.Short qualified as ShortText import System.FilePath.Posix (()) import System.FilePath.Posix qualified as File import Text.Blaze import Text.Blaze.Html5 qualified as H import Text.Blaze.Html5.Attributes qualified as HA import Text.Blaze.Renderer.Utf8 qualified as Blaze import Prelude (error) import Language.Chinese (ChineseDict (..), ChineseDictEntries (..)) import Language.Chinese qualified import Paths_worksheets qualified as Self import Utils.Char as Char import Utils.HTML import Utils.Prelude data Langue = LangueFrançais | LangueAnglais | LangueMandarin | LangueMandarinPinyin | LanguePhonetic deriving (Eq, Ord, Show) instance HasTypeDefault Langue where typeDefault = LangueAnglais data Length = LengthFractionalRatio Natural | LengthMillimeters Double deriving (Eq, Show) instance ToCSS Length where toCSS = \case LengthFractionalRatio x -> show x <> "fr" LengthMillimeters x -> show x <> "mm" cm :: Double -> Length cm = LengthMillimeters . (* 10) mm :: Double -> Length mm = LengthMillimeters fr :: Natural -> Length fr = LengthFractionalRatio data RosettaDifficulty = RosettaDifficulty { rosettaDifficultyCharWidth :: Length , rosettaDifficultyCharHeight :: Length , rosettaDifficultyWordSpacing :: Length , rosettaDifficultyFontSize :: Length , rosettaDifficultyHiddenPatterns :: Bool } deriving (Eq, Show, Generic) instance HasTypeDefault RosettaDifficulty where typeDefault = RosettaDifficulty { rosettaDifficultyCharWidth = 1 & cm , rosettaDifficultyCharHeight = 1 & cm , rosettaDifficultyWordSpacing = 1 & cm , rosettaDifficultyFontSize = 1 & cm , rosettaDifficultyHiddenPatterns = False } cssBlockObjectFitCover :: CSSBlock cssBlockObjectFitCover = ["object-fit" := "cover"] data RosettaPart = RosettaPart { rosettaPartPicture :: File.FilePath , rosettaPartPictureCSS :: CSSBlock , rosettaPartDescription :: ShortText , rosettaPartText :: ShortText , rosettaPartLangue :: Langue } deriving (Eq, Ord, Show, Generic) deriving (HasTypeDefault) via (Generically RosettaPart) data PageSize = PageSizeA5 | PageSizeA4 | PageSizeA3 deriving (Eq, Ord, Show, Generic) instance HasTypeDefault PageSize where typeDefault = PageSizeA4 data PageOrientation = PageOrientationPortrait | PageOrientationLandscape deriving (Eq, Ord, Show, Generic) instance HasTypeDefault PageOrientation where typeDefault = PageOrientationLandscape data Rosetta = Rosetta { rosettaParts :: [RosettaPart] , rosettaColumnPictureWidth :: Length , rosettaColumnTextWidth :: Length , rosettaPageSize :: PageSize , rosettaPageOrientation :: PageOrientation , rosettaDifficulties :: RosettaDifficulties } deriving (Eq, Show, Generic) instance HasTypeDefault Rosetta where typeDefault = Rosetta { rosettaParts = [] , rosettaColumnPictureWidth = 1 & fr , rosettaColumnTextWidth = 1 & fr , rosettaPageSize = typeDefault , rosettaPageOrientation = typeDefault , rosettaDifficulties = typeDefault } rosettaPortrait m = m { rosettaPageOrientation = PageOrientationPortrait , rosettaColumnPictureWidth = 3 & fr , rosettaColumnTextWidth = 4 & fr } rosettaLandscape m = m { rosettaPageOrientation = PageOrientationLandscape , rosettaColumnPictureWidth = 1 & fr , rosettaColumnTextWidth = 2 & fr } type RosettaDifficulties = Map UnicodeBlock RosettaDifficulty rosettaDifficultiesLatinBig = mapInsertManyWithTypeDefault Char.unicodeBlockLatin \v -> v { rosettaDifficultyCharWidth = 1 & cm , rosettaDifficultyCharHeight = 1 & cm , rosettaDifficultyWordSpacing = 0.5 & cm , rosettaDifficultyFontSize = 0.90 & cm } rosettaDifficultiesCJKBig :: Modifier RosettaDifficulties rosettaDifficultiesCJKBig = mapInsertManyWithTypeDefault unicodeBlockCJK \v -> v { rosettaDifficultyCharWidth = 1.50 & cm , rosettaDifficultyCharHeight = 1.50 & cm , rosettaDifficultyWordSpacing = 0.5 & cm , rosettaDifficultyFontSize = 1.40 & cm } rosettaDifficultiesLatinHidden :: Modifier RosettaDifficulties rosettaDifficultiesLatinHidden = mapInsertManyWithTypeDefault unicodeBlockLatin \v -> v{rosettaDifficultyHiddenPatterns = True} rosettaDifficultiesCJKHidden :: Modifier RosettaDifficulties rosettaDifficultiesCJKHidden = mapInsertManyWithTypeDefault unicodeBlockCJK \v -> v{rosettaDifficultyHiddenPatterns = True} lookupPinyins :: ChineseDict -> ShortText -> [ShortText] lookupPinyins (ChineseDict dict) word = word & (`Map.lookup` dict) & fromMaybe (error $ "lookupPinyins: no entry for: " <> wordString) & chinesePinyins & (\ps -> if null ps then error $ "lookupPinyins: empty entry for: " <> wordString else ps) where wordString = word & ShortText.unpack data Token = Token { tokenText :: ShortText , tokenMeta :: (Char.GeneralCategory, Maybe UnicodeBlock) } deriving (Eq, Ord, Show) rosettaTokenizer :: ShortText -> [Token] rosettaTokenizer s = s & ShortText.unpack & group where group [] = [] group (inpHead : inpTail) = tok : group rest where tok = Token { tokenText = inpHead : txt & ShortText.pack , tokenMeta } tokenMeta = ( inpHead & Char.generalCategory , inpHead & unicodeBlock ) (txt, rest) = inpTail & List.span \c -> (Char.generalCategory c, unicodeBlock c) == tokenMeta groupByHoriz :: [Token] -> [[Token]] groupByHoriz = group where group [] = [] group (inpHead : inpTail) = case inpHead of Token{tokenMeta = (Char.Space, _)} -> group rest where (_skipSpaces, rest) = inpTail & List.span onSep tok -> (tok : nonSeps) : group rest where (nonSeps, rest) = inpTail & List.break onSep where onSep = \case Token{tokenText, tokenMeta = (Char.Space, _)} | tokenText & ShortText.unpack & all (== '\xA0') -> False | otherwise -> True _ -> False splitWords :: [Token] -> [[Token]] splitWords = group where group :: [Token] -> [[Token]] group [] = [] group (inpHead : inpTail) = case inpHead of Token{tokenText = ShortText.unpack >>> all (== '\xA0') -> True, tokenMeta = (Char.Space, _)} -> group rest where (_skipSpaces, rest) = inpTail & List.span onSep tok -> (tok : nonSeps) : group rest where (nonSeps, rest) = inpTail & List.break onSep where onSep = \case Token{tokenText = ShortText.unpack >>> all (== '\xA0') -> True, tokenMeta = (Char.Space, _)} -> True _ -> False -- | CorrectnessNote: beware than the tokenMeta is just preserved, -- it does not correspond to the pronunciation unicode code points. rosettaWordPonunciations :: ChineseDict -> [Token] -> [Token] rosettaWordPonunciations chineseDict toks = toks & List.concatMap \tok -> let tokText = tok & tokenText in let tokString = tokText & ShortText.unpack in case tok & tokenMeta of (_, Just UnicodeBlockCJK{}) -> pinyins <&> \tokenText -> tok{tokenText} where pinyins :: [ShortText] pinyins | tokString & all Char.isNumber = tokString & List.concatMap \char -> char & ShortText.singleton & lookupPinyins chineseDict | List.length tokTextPins == ShortText.length tokText = tokTextPins | otherwise = error "rosettaWordPonunciations: pinyins length mismatch" tokTextPins = tokText & lookupPinyins chineseDict (_, _) -> tokString <&> \_c -> tok{tokenText = ""} rosettaWordChars :: [Token] -> [Token] rosettaWordChars toks = toks & List.concatMap \tok -> let tokText = tok & tokenText in let tokString = tokText & ShortText.unpack in tokString <&> \char -> tok{tokenText = char & ShortText.singleton} rosettaHTML :: ChineseDict -> Text -> Rosetta -> IO Builder rosettaHTML chineseDict title Rosetta{..} = do -- FIXME: this absolute path is not portable out of my system dataPath <- Self.getDataDir <&> File.normalise return $ Blaze.renderMarkupBuilder do H.docTypeHtml do let (pageWidth, pageHeight) = case rosettaPageOrientation of PageOrientationLandscape -> (29.7 & cm, 21.0 & cm) PageOrientationPortrait -> (21.0 & cm, 29.7 & cm) H.head do H.title $ title & H.toHtml H.link ! HA.rel "stylesheet" ! HA.type_ "text/css" ! HA.href (dataPath "styles/rosetta.css" & toValue) -- rosettaDifficulties styleCSS $ fromList [ [ [".script-" <> className uniScript] := [ "width" := rosettaDifficultyCharWidth & toCSS , "height" := rosettaDifficultyCharHeight & toCSS , "font-size" := rosettaDifficultyFontSize & toCSS ] , [".writing-words"] := ["column-gap" := rosettaDifficultyWordSpacing & toCSS] , [".writing-words-horiz"] := ["column-gap" := rosettaDifficultyWordSpacing & toCSS] ] | (uniScript, RosettaDifficulty{..}) <- rosettaDifficulties & toList ] & mconcat -- print styleCSS $ fromList [ ["@media print", node] := [ "width" := pageWidth & toCSS , "height" := pageHeight & toCSS ] | node <- ["html", "body"] ] styleCSS [ ["@page"] := [ "size" := List.unwords [ case rosettaPageSize of PageSizeA5 -> "A5" PageSizeA4 -> "A4" PageSizeA3 -> "A3" , case rosettaPageOrientation of PageOrientationPortrait -> "portrait" PageOrientationLandscape -> "landscape" ] ] ] H.body do "\n" let numOfParts = rosettaParts & List.length H.div ! classes ["main-page"] $ do H.div ! classes [ "rosetta" , "sub-page" , "page-" <> className rosettaPageSize <> "-" <> className rosettaPageOrientation ] ! styles [ "grid-template-columns" := [rosettaColumnPictureWidth, rosettaColumnTextWidth] <&> toCSS & List.unwords , "grid-template-rows" := "1fr" & List.replicate numOfParts & List.unwords ] $ do forM_ rosettaParts \RosettaPart{..} -> do "\n" H.div ! classes ["rosetta-cell", "rosetta-cell-picture"] $ do unless (rosettaPartDescription & ShortText.null) do H.span ! classes ["rosetta-cell-picture-description"] $ do rosettaPartDescription & H.toHtml H.img ! styles rosettaPartPictureCSS ! HA.src ("file://" <> dataPath "images" rosettaPartPicture & toValue) H.div ! classes ["rosetta-cell", "writing-words"] $ do forM_ (rosettaPartText & rosettaTokenizer & groupByHoriz) \writingHoriz -> do "\n" H.div ! classes [ "writing-words-horiz" ] $ do forM_ (writingHoriz & splitWords) \writingWord -> do -- traceShowM ("writingWord"::String, writingWord) let wordRow = writingWord & rosettaWordChars H.div ! classes [ "writing-words-word" , "lang-" <> className rosettaPartLangue ] ! styles [ "grid-template-columns" := [ width & toCSS | Token{tokenMeta = (_, fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic) -> uniScript)} <- wordRow , let width = rosettaDifficulties & lookupOrTypeDefault uniScript & rosettaDifficultyCharWidth ] & List.unwords ] $ do let wordPronunciations = writingWord & rosettaWordPonunciations chineseDict unless (wordPronunciations & all (tokenText >>> ShortText.null)) do forM_ wordPronunciations \cellToken -> do H.div ! classes [ "writing-words-cell" , "pronunciation" ] $ do cellToken & tokenText & Language.Chinese.numberedPinyinToDiacriticPiniyn & ShortText.toText & Text.toLower & H.toHtml forM_ (["model", "input"] :: [String]) \rowKind -> do forM_ wordRow \cellToken -> do let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic) H.div ! classes [ "writing-words-cell" , rowKind , "script-" <> className uniScript , if rowKind == "input" && ( rosettaDifficulties & lookupOrTypeDefault uniScript & rosettaDifficultyHiddenPatterns ) then "hidden" else mempty ] $ do cellToken & tokenText & H.toHtml