{-# LANGUAGE OverloadedLists #-} module Worksheets.Writing.Rosetta where import Data.ByteString.Builder (Builder) import Data.Char qualified as Char import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Text qualified as Text import Data.Text.Short qualified as ShortText import System.FilePath.Posix (()) import System.FilePath.Posix qualified as File import Text.Blaze import Text.Blaze.Html5 qualified as H import Text.Blaze.Html5.Attributes qualified as HA import Text.Blaze.Renderer.Utf8 qualified as Blaze import Prelude (error) import Language.Chinese (ChineseDict (..), ChineseDictEntries (..)) import Language.Chinese qualified import Paths_worksheets qualified as Self import Utils.Char import Utils.HTML import Utils.Prelude data Langue = LangueFrançais | LangueAnglais | LangueMandarin | LangueMandarinPinyin | LanguePhonetic deriving (Eq, Ord, Show) type FractionalRatio = Double type Centimeter = Double data RosettaDifficulty = RosettaDifficulty { rosettaDifficultyCharWidth :: Centimeter , rosettaDifficultyCharHeight :: Centimeter , rosettaDifficultyFontSize :: Centimeter , rosettaDifficultyHiddenPatterns :: Bool } deriving (Eq, Ord, Show, Generic) data RosettaPart = RosettaPart { rosettaPartPicture :: File.FilePath , rosettaPartDescription :: ShortText , rosettaPartText :: ShortText , rosettaPartLangue :: Langue } deriving (Eq, Ord, Show, Generic) data PageSize = PageSizeA5 | PageSizeA4 | PageSizeA3 deriving (Eq, Ord, Show, Generic) data PageOrientation = PageOrientationPortrait | PageOrientationLandscape deriving (Eq, Ord, Show, Generic) data Rosetta = Rosetta { rosettaParts :: [RosettaPart] , -- , rosettaClasses :: [String] rosettaColumnPictureWidth :: FractionalRatio , rosettaColumnTextWidth :: FractionalRatio , rosettaPageSize :: PageSize , rosettaPageOrientation :: PageOrientation , rosettaDifficulties :: Map UnicodeBlock RosettaDifficulty } deriving (Eq, Ord, Show, Generic) lookupPinyins :: ChineseDict -> ShortText -> [ShortText] lookupPinyins (ChineseDict dict) word = word & (`Map.lookup` dict) & fromMaybe (error $ "lookupPinyins: no entry for: " <> wordString) & chinesePinyins & (\ps -> if null ps then error $ "lookupPinyins: empty entry for: " <> wordString else ps) where wordString = word & ShortText.unpack cm :: Double -> String cm x = show x <> "cm" fr :: Double -> String fr x = show x <> "fr" data Token = Token { tokenText :: ShortText , tokenMeta :: (Char.GeneralCategory, Maybe UnicodeBlock) } deriving (Eq, Ord, Show) rosettaTokenizer :: ShortText -> [Token] rosettaTokenizer s = s & ShortText.unpack & group where group [] = [] group (inpHead : inpTail) = tok : group rest where tok = Token { tokenText = inpHead : txt & ShortText.pack , tokenMeta } tokenMeta = ( inpHead & Char.generalCategory , inpHead & unicodeBlock ) (txt, rest) = inpTail & List.span \c -> (Char.generalCategory c, unicodeBlock c) == tokenMeta groupByHoriz :: [Token] -> [[Token]] groupByHoriz = group where group [] = [] group (inpHead : inpTail) = case inpHead of Token{tokenMeta = (Char.Space, _)} -> group rest where (_skipSpaces, rest) = inpTail & List.span onSep tok -> (tok : nonSeps) : group rest where (nonSeps, rest) = inpTail & List.break onSep where onSep = \case Token{tokenText, tokenMeta = (Char.Space, _)} | tokenText & ShortText.unpack & all (== '\xA0') -> False | otherwise -> True _ -> False splitWords :: [Token] -> [[Token]] splitWords = group where group :: [Token] -> [[Token]] group [] = [] group (inpHead : inpTail) = case inpHead of Token{tokenText = ShortText.unpack >>> all (== '\xA0') -> True, tokenMeta = (Char.Space, _)} -> group rest where (_skipSpaces, rest) = inpTail & List.span onSep tok -> (tok : nonSeps) : group rest where (nonSeps, rest) = inpTail & List.break onSep where onSep = \case Token{tokenText = ShortText.unpack >>> all (== '\xA0') -> True, tokenMeta = (Char.Space, _)} -> True _ -> False -- | CorrectnessNote: beware than the tokenMeta is just preserved, -- it does not correspond to the pronunciation unicode code points. rosettaWordPonunciations :: ChineseDict -> [Token] -> [Token] rosettaWordPonunciations chineseDict toks = toks & List.concatMap \tok -> let tokText = tok & tokenText in let tokString = tokText & ShortText.unpack in case tok & tokenMeta of (_, Just UnicodeBlockCJK{}) -> pinyins <&> \tokenText -> tok{tokenText} where pinyins :: [ShortText] pinyins | tokString & all Char.isNumber = tokString & List.concatMap \char -> char & ShortText.singleton & lookupPinyins chineseDict | List.length tokTextPins == ShortText.length tokText = tokTextPins | otherwise = error "rosettaWordPonunciations: pinyins length mismatch" tokTextPins = tokText & lookupPinyins chineseDict (_, _) -> tokString <&> \_c -> tok{tokenText = ""} rosettaWordChars :: [Token] -> [Token] rosettaWordChars toks = toks & List.concatMap \tok -> let tokText = tok & tokenText in let tokString = tokText & ShortText.unpack in tokString <&> \char -> tok{tokenText = char & ShortText.singleton} {- forM_ tokString \writingChar -> do H.div ! classes [ "writing-words-cell" , "writing-words-cell-space" ] $ do "" forM_ writingWord \writingToken -> do let tokText = writingToken & tokenText let tokString = tokText & ShortText.unpack traceShowM ("writingToken", writingToken) -} rosetta :: ChineseDict -> Text -> Rosetta -> IO Builder rosetta chineseDict title Rosetta{..} = do -- FIXME: this absolute path is not portable out of my system dataPath <- Self.getDataDir <&> File.normalise return $ Blaze.renderMarkupBuilder do H.docTypeHtml do let (pageWidth, pageHeight) = case rosettaPageOrientation of PageOrientationLandscape -> (29.7, 21.0) PageOrientationPortrait -> (21.0, 29.7) H.head do H.title $ title & H.toHtml H.link ! HA.rel "stylesheet" ! HA.type_ "text/css" ! HA.href (toValue $ dataPath "styles/rosetta.css") styleCSS $ fromList [ [".script-" <> className uniScript] := [ "width" := rosettaDifficultyCharWidth & cm , "height" := rosettaDifficultyCharHeight & cm , "font-size" := rosettaDifficultyFontSize & cm ] | (uniScript, RosettaDifficulty{..}) <- rosettaDifficulties & Map.toList ] styleCSS $ fromList [ ["@media print", node] := [ "width" := pageWidth & cm , "height" := pageHeight & cm ] | node <- ["html", "body"] ] styleCSS $ [ ["@page"] := [ "size" := List.unwords [ case rosettaPageSize of PageSizeA5 -> "A5" PageSizeA4 -> "A4" PageSizeA3 -> "A3" , case rosettaPageOrientation of PageOrientationPortrait -> "portrait" PageOrientationLandscape -> "landscape" ] ] ] H.body do "\n" let numOfParts = rosettaParts & List.length let rowGap = 0.25 H.div ! classes [ "main-page" ] $ do H.div ! classes [ "rosetta" , "sub-page" , "page-" <> className rosettaPageSize <> "-" <> className rosettaPageOrientation ] ! styles [ "grid-template-columns" := fr rosettaColumnPictureWidth <> " " <> fr rosettaColumnTextWidth , "grid-template-rows" := -- (pageHeight / fromIntegral numOfParts & (\x -> x - (fromIntegral numOfParts * rowGap)) & cm) "1fr" & List.replicate numOfParts & List.unwords , "row-gap" := rowGap & cm ] $ do forM_ rosettaParts \RosettaPart{..} -> do "\n" H.div ! classes ["rosetta-cell", "rosetta-cell-picture"] $ do unless (ShortText.null rosettaPartDescription) do H.span ! classes ["rosetta-cell-picture-description"] $ do rosettaPartDescription & H.toHtml H.img ! HA.src ("file://" <> dataPath "images" rosettaPartPicture & toValue) H.div ! classes ["rosetta-cell", "writing-words"] $ do forM_ (rosettaPartText & rosettaTokenizer & groupByHoriz) \writingHoriz -> do "\n" H.div ! classes [ "writing-words-horiz" ] $ do forM_ (writingHoriz & splitWords) \writingWord -> do -- traceShowM ("writingWord"::String, writingWord) let wordRow = writingWord & rosettaWordChars H.div ! classes [ "writing-words-word" , "lang-" <> className rosettaPartLangue ] ! styles [ "grid-template-columns" := [ width & cm | Token{tokenMeta = (_, fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic) -> uniScript)} <- wordRow , let width = rosettaDifficulties & Map.lookup uniScript & maybe 1 rosettaDifficultyCharWidth ] & List.unwords ] $ do let wordPronunciations = writingWord & rosettaWordPonunciations chineseDict unless (wordPronunciations & all (tokenText >>> ShortText.null)) do forM_ wordPronunciations \cellToken -> do H.div ! classes [ "writing-words-cell" , "pronunciation" ] $ do cellToken & tokenText & Language.Chinese.numberedPinyinToDiacriticPiniyn & ShortText.toText & Text.toLower & H.toHtml forM_ (["model", "input"] :: [String]) \rowKind -> do forM_ wordRow \cellToken -> do let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic) H.div ! classes [ "writing-words-cell" , rowKind , "script-" <> className uniScript , if rowKind == "input" && ( rosettaDifficulties & Map.lookup uniScript & maybe False rosettaDifficultyHiddenPatterns ) then "hidden" else mempty ] $ do cellToken & tokenText & H.toHtml