{-# LANGUAGE OverloadedLists #-} {-# LANGUAGE ImportQualifiedPost #-} module Language.PronunciationSpec where import Data.ByteString.Builder qualified as ByteString.Builder import Data.GenValidity.Map () import Data.GenValidity.Sequence () import Data.GenValidity.Set () import Data.GenValidity.Text () import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Set qualified as Set import Data.Text qualified as Text import Data.Text.Encoding qualified as Text import Data.Text.Lazy.Encoding qualified as Text.Lazy import Data.Text.Short qualified as ShortText import Data.Validity.Map () import Data.Validity.Set () import Data.Validity.Text () import Language import Language.French import Language.French qualified as French import Language.English import Language.English qualified as English import Language.Pronunciation qualified as Pron import Paths_worksheets qualified as Self import System.Directory qualified as IO import System.FilePath (joinPath, pathSeparator, (<.>), ()) import System.FilePath.Posix qualified as File import System.IO qualified as IO import Test.Syd import Text.Blaze.Html5.Attributes qualified as HA import Wiktionary qualified import Worksheets.Utils.HTML (Html, className, classes, cm, styles, (!)) import Worksheets.Utils.HTML qualified as HTML import Worksheets.Utils.IPA qualified as IPA import Worksheets.Utils.Paper qualified as Paper import Worksheets.Utils.Prelude import Worksheets.Utils.SQL qualified as SQL import Prelude (error, even, mod) import Utils.Tests cardsHtml :: String -> IO Html -> TestDefM (outers) () () cardsHtml title html = do outPath <- goldenPath title "html" builder <- html <&> HTML.renderMarkupBuilder & liftIO it title do goldenByteStringBuilderFile outPath (return builder) pronunciationDictTest dict = do let title = "lexer" outPath <- goldenPath title "html" builder <- dict & Pron.tableHtml <&> HTML.renderMarkupBuilder & liftIO it title do goldenByteStringBuilderFile outPath (return builder) pronunciationDecompositionTest :: String -> TestDefM (outers) () () pronunciationDecompositionTest sentence = do outPath <- goldenPath sentence "txt" it sentence do pureGoldenByteStringBuilderFile outPath $ case sentence & Text.pack & Pron.lexerPron French.pronunciationDict of Right x -> x & foldMap \(key, lex) -> key <> " → " <> lex <> "\n" & Text.encodeUtf8 & ByteString.Builder.byteString err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString spec :: HasCallStack => Spec spec = do describe "English" do pronunciationDictTest $ English.pronunciationDict describe "French" do pronunciationDictTest $ French.pronunciationDict describe "textToLexeme" do forM_ sentences \sentence -> pronunciationDecompositionTest $ sentence & Text.unpack withoutRetries do withoutTimeout do around (\k -> SQL.withConnection "data/langs/français/kaikki/wiktionary=fr.lang=fr.sqlite3" k) do outDB <- goldenPath "wiktionary" "sqlite" outDBExists <- IO.doesFileExist outDB & liftIO when (outDBExists) do IO.removeFile outDB & liftIO aroundWith ( \k wiktConn -> do SQL.withConnection outDB \outConn -> k (wiktConn, outConn) ) do it "check-against-wiktionary" \(wiktConn, outConn) -> do SQL.execute_ outConn $ "PRAGMA journal_mode = OFF" SQL.execute_ outConn $ "PRAGMA synchronous = OFF" SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS results (exprLit TEXT NON NULL, wiktBroad TEXT NON NULL, wiktBroadSegments TEXT NON NULL, synthBroad TEXT NON NULL, synthExplain TEXT NON NULL)" SQL.execute_ outConn $ "CREATE INDEX results__exprLit ON results (exprLit);" SQL.execute_ outConn $ "CREATE INDEX results__wiktBroad ON results (wiktBroad);" SQL.execute_ outConn $ "CREATE INDEX results__synthBroad ON results (synthBroad);" SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS errors (exprId INTEGER NON NULL, exprLit TEXT NON NULL, error TEXT NON NULL)" SQL.fold wiktConn "SELECT id,word,sounds FROM wiktionary WHERE lang_code='fr' LIMIT 1000" () (1 :: Int) \ !i ( exprId :: Int , maybeWord :: Maybe Text , sounds :: [Wiktionary.Sound] ) -> do case maybeWord of Nothing -> return () Just exprLit -> case exprLit & Pron.lexerPron French.pronunciationDict of Left err -> do SQL.execute outConn "INSERT INTO errors (exprId, exprLit, error) VALUES (?,?,?)" ( exprId , exprLit , err & pShowNoColor ) Right lexemes -> do let synthExplain = lexemes <&> (\(key, lex) -> key <> " → " <> lex) & Text.intercalate "; " let synthBroad = lexemes & foldMap snd forM_ sounds \Wiktionary.Sound{..} -> do -- IO.hPrint IO.stderr (i, ident::Int, maybeWord) case sound_ipa of Just (IPA.IPAPhons exprIPAs) -> do forM_ exprIPAs \case IPA.IPAPhonemic exprBroad -> do when (i `mod` 5000 == 0) do IO.hPrint IO.stderr (i, exprLit, exprBroad) SQL.execute outConn "INSERT INTO results(exprLit, wiktBroad, wiktBroadSegments, synthBroad, synthExplain) VALUES(?,?,?,?,?);" ( exprLit , exprBroad & IPA.ipaWordsToText Nothing , exprBroad & IPA.dropSupraSegmentalFeatures & IPA.ipaWordsToText Nothing , synthBroad , synthExplain ) -- & traceShowId IPA.IPAPhonError (_errMsg, _err) -> return () _ -> return () _ -> return () return (i + 1) return @IO () {- mangleSupraSegmentalFeatures :: [IPA.Syllable []] -> [[IPA.Segment]] mangleSupraSegmentalFeatures = \case [] -> [] IPA.Syllable syl : ts -> syl : mangleSupraSegmentalFeatures ts IPA.WithSuprasegmentalFeature IPA.Linking syl : ts -> case mangleSupraSegmentalFeatures ts of [] -> [dropSupraSegmentalFeatures syl] x : xs -> (dropSupraSegmentalFeatures syl <> x) : xs IPA.WithSuprasegmentalFeature _feat syl : xs -> mangleSupraSegmentalFeatures (syl : xs) -} -- cardsHtml "syllabes" $ French.syllablesTable & French.syllablesTableToHTML sentences :: [Text] sentences = ( [ "ampoules" , "papillon" , "aiguille" , "aimer" , "ail" ] <> ) $ French.pronunciationDict & Pron.lexerDictMap & Map.elems <&> \lex -> lex & Pron.lexemeExample & foldMap (Pron.literalText >>> ShortText.toText)