{-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} module Language.FrenchSpec where import Data.ByteString.Builder qualified as ByteString.Builder import Data.GenValidity.Map () import Data.GenValidity.Sequence () import Data.GenValidity.Set () import Data.GenValidity.Text () import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Set qualified as Set import Data.Text qualified as Text import Data.Text.Encoding qualified as Text import Data.Text.Lazy.Encoding qualified as Text.Lazy import Data.Text.Short qualified as ShortText import Data.Validity.Map () import Data.Validity.Set () import Data.Validity.Text () import Language import Language.French qualified as French import Language.Pronunciation (ExampleLiteral (..), Lexeme (..), Rule (..), RuleLexemes, after, before, begining, ending, meaning, occurence, rule, silent, word) import Language.Pronunciation qualified as Pron import Paths_worksheets qualified as Self import System.Directory qualified as IO import System.FilePath (joinPath, pathSeparator, (<.>), ()) import System.FilePath.Posix qualified as File import System.IO qualified as IO import Test.Syd import Text.Blaze.Html5.Attributes qualified as HA import Utils.Pronunciation qualified as Pron import Utils.Tests import Wiktionary qualified import Worksheets.Utils.HTML (Html, className, classes, cm, styles, (!)) import Worksheets.Utils.HTML qualified as HTML import Worksheets.Utils.IPA qualified as IPA import Worksheets.Utils.Paper qualified as Paper import Worksheets.Utils.Prelude import Worksheets.Utils.SQL qualified as SQL import Prelude (error, even, mod) {- instance IsString Rule where fromString s = rule {ruleLexemes = fromString s} [ ful "de" := single "də" ["de" & occurence] , inf "'" := single "" [] , inf "-" := single "" [] , inf "," := single "" [] , inf "?" := single "" [] , inf "!" := single "" [] , inf borderLeftText := single "" [] , inf borderRightText := single "" [] , inf "demi" := single "dəmi" ["demi" & occurence] , -- , inf "amb" := single "ɑ̃b" ["amb" & occurence, "ulance"] -- , inf "amp" := single "ɑ̃p" ["amp" & occurence, "oule"] -- , inf "amph" := single "ɑ̃f" ["amph" & occurence, "ithéâtre"] -- , inf "an" := multi -- [ [PronContextBeforeBorder] := LexemePron{lexemePronunciation="ɑ̃", lexemeExample=["pl", "an" & occurence]} -- , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation="an", lexemeExample=["an" & occurence, "imal"]} -- , [PronContextBeforeAny] := LexemePron{lexemePronunciation="ɑ̃", lexemeExample=["an" & occurence, "tilope"]} -- ] , -- , inf "coin" := single "kwɛ̃" ["coin" & occurence, "cer"] , -- , inf "im" -- := multi -- [ [PronContextBeforeBorder] := LexemePron{lexemePronunciation = "", lexemeExample = } -- , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "im", lexemeExample = ["im" & occurence, "ortel"]} -- , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "ɛ̃", lexemeExample = ["im" & occurence, "primer"]} -- ] , -- , inf "in" := single "ɛ̃" ["merl", "in" & occurence] -- , inf "ina" := single "ina" ["ord", "ina" & occurence, "teur"] -- , inf "ine" := single "in" ["rout", "ine" & occurence] -- , inf "inent" := single "in" ["dess", "inent" & occurence] -- , inf "iner" := single "inɛʁ" ["iner" & occurence, "tie"] -- , inf "inex" := single "inɛgz" ["inex" & occurence, "istant"] -- , inf "inexp" := single "inɛksp" ["inexp" & occurence, "licable"] -- , inf "inima" := single "inima" ["inima" & occurence, "ginable"] -- , inf "inimi" := single "inimi" ["inimi" & occurence, "table"] -- , inf "inimp" := single "inimp" ["inimp" & occurence, "ortant"] -- , inf "ininf" := single "inɛ̃" ["ininf" & occurence, "lammation"] -- , inf "inint" := single "inɛ̃t" ["inint" & occurence, "éressant"] -- , inf "inn" := single "in" ["inn" & occurence, "ocent"] -- , inf "ino" := single "ino" ["ino" & occurence, "dore"] -- , inf "inu" := single "inu" ["inu" & occurence, "tile"] -- , inf "ix" := single "iks" ["phén", "ix" & occurence] -- , inf "gine" := single "ʒin" ["an", "gine" & occurence] -- , inf "era" := single "əʁa" ["s", "era" & occurence] -- , inf "erai" := single "əʁɛ" ["s", "erai" & occurence] , inf "era" := multi [ [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "əʁ", lexemeExample = ["mer" & occurence, "ingue"]} , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "ɛʁ", lexemeExample = ["mer" & occurence, "le"]} ] , inf "erin" := single "əʁɛ̃" ["m", "erin" & occurence, "gue"] , inf "s" := multi [ [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "z", lexemeExample = ["ti", "s" & occurence, "ane"]} , [PronContextBeforeAnySemiVowel] := LexemePron{lexemePronunciation = "z", lexemeExample = ["pari", "s" & occurence, "ienne"]} , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "s", lexemeExample = ["s", "a" & occurence]} ] , -- , inf "shirt" := single "ʃœʁt" ["shirt" & occurence] , -- , inf "teau" := single "to" ["teau" & occurence] -- , inf "ti" := single "ti" ["ti" & occurence, "gre"] , pre "ukulele" := single "jukulele" ["ukulele" & occurence] , -- , inf "ui" := single "ɥi" ["ling", "ui" & occurence, "stique"] , pre "abrivent" := single "abʁivɑ̃" ["abrivent" & occurence] , pre "adjuvent" := single "adʒyvɑ̃" ["adjuvent" & occurence] , pre "antivent" := single "ɑ̃tivɑ̃" ["antivent" & occurence] , pre "auvent" := single "ovɑ̃" ["auvent" & occurence] , pre "avent" := single "avɑ̃" ["avent" & occurence] , pre "aï" := single "ai" ["c", "aï" & occurence, "man"] , pre "bavent" := single "bavɑ̃" ["bavent" & occurence] , pre "boutavent" := single "butavɑ̃" ["boutavent" & occurence] , pre "bouvent" := single "buvɑ̃" ["bouvent" & occurence] , pre "bénévent" := single "benevɑ̃" ["bénévent" & occurence] , pre "connivent" := single "konivɑ̃" ["connivenr" & occurence] , pre "contrevent" := single "kɔ̃tʁəvɑ̃" ["contrevent" & occurence] , pre "convent" := single "kɔ̃vɑ̃" ["convent" & occurence] , pre "couvent" := single "kuvɑ̃" ["couvent" & occurence] , pre "couvents" := single "kuvɑ̃" ["couvents" & occurence] , pre "engoulevent" := single "ɑ̃gulvɑ̃" ["engoulevent" & occurence] , pre "fervent" := single "fɛʁvɑ̃" ["fervent" & occurence] , pre "frévent" := single "fʁevɑ̃" ["frévent" & occurence] , pre "heurtevent" := single "œʁtəvɑ̃" ["heurtevent" & occurence] , pre "her" := single "ɛʁ" ["her" & occurence, "be"] , pre "hi" := single "i" ["hi" & occurence, "ver"] , pre "ill" := single "il" ["ill" & occurence, "étrisme"] , pre "jack" := single "dʒak" ["jack" & occurence] , pre "jazz" := single "dʒaz" ["jazz" & occurence] , pre "montsurvent" := single "mɔ̃syʁvɑ̃" ["montsurvent" & occurence] , -- , pre "ni" := single "ni" ["ni" & occurence, "ais"] pre "niai" := single "njɛ" ["niai" & occurence, "s"] , pre "paravent" := single "paʁavɑ̃" ["paravent" & occurence] , pre "sen" := multi [ [PronContextBeforeAny] := LexemePron{lexemePronunciation = "sɑ̃", lexemeExample = ["sen" & occurence, "t"]} , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "sen", lexemeExample = ["sen" & occurence, "a"]} -- , inf "a" := LexemePron{lexemePronunciation="sen", lexemeExample=["sen" & occurence, "a"]} -- , inf "e" := LexemePron{lexemePronunciation="sən", lexemeExample=["sen" & occurence, "estre"]} -- , inf "i" := LexemePron{lexemePronunciation="sen", lexemeExample=["sen" & occurence, "ior"]} -- , inf "o" := LexemePron{lexemePronunciation="sen", lexemeExample=["Sen" & occurence, "oueix"]} -- , inf "u" := LexemePron{lexemePronunciation="sen", lexemeExample=[]} ] , pre "souvent" := single "suvɑ̃" ["souvent" & occurence] , pre "taillevent" := single "tɑjvɑ̃" ["taillevent" & occurence] , pre "tournevent" := single "tuʁnəvɑ̃" ["tournevent" & occurence] , pre "vent" := single "vɑ̃" ["vent" & occurence] , pre "virevent" := single "viʁvɑ̃" ["virevent" & occurence] , pre "volvent" := single "vɔlvɑ̃" ["volvent" & occurence] , pre "évent" := single "evɑ̃" ["évent" & occurence] , pre "œ" := single "œ" ["œ" & occurence, "uf"] , pre "fier" := single "fjɛʁ" ["fier" & occurence] , pre "tier" := single "tjɛʁ" ["tier" & occurence] ] & list & foldMap (\(ks, v) -> [(k, v) | k <- ks]) & mapFromListCheckingDuplicates where single pron exs = Map.singleton PronContextBeforeAny $ LexemePron{lexemePronunciation = pron, lexemeExample = exs} multi l = l & list & foldMap (\(ks, v) -> [(k, v) | k <- ks]) & mapFromListCheckingDuplicates -} spec :: HasCallStack => Spec spec = do -- Pron.lexerRender $ French.pronunciationDict describe "pronunciation" do -- describe "decomposition" do -- forM_ sentences \sentence -> -- Pron.pronunciationDecompositionTest French.pronunciationDict $ sentence & Text.unpack -- describe "result" do -- forM_ sentences \sentence -> -- Pron.pronunciationResultTest French.pronunciationDict $ sentence & Text.unpack describe "parser" do forM_ sentences \sentence -> Pron.pronunciationParserTest French.pronunciationRules sentence {- withoutRetries do withoutTimeout do around (\k -> SQL.withConnection "data/langs/français/kaikki/wiktionary=fr.lang=fr.sqlite3" k) do outDB <- goldenPath "wiktionary" "sqlite" outDBExists <- IO.doesFileExist outDB & liftIO when (outDBExists) do IO.removeFile outDB & liftIO aroundWith ( \k wiktConn -> do SQL.withConnection outDB \outConn -> k (wiktConn, outConn) ) do it "check-against-wiktionary" \(wiktConn, outConn) -> do SQL.execute_ outConn $ "PRAGMA journal_mode = OFF" SQL.execute_ outConn $ "PRAGMA synchronous = OFF" SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS results (exprLit TEXT NON NULL, wiktBroad TEXT NON NULL, wiktBroadSegments TEXT NON NULL, synthBroad TEXT NON NULL, synthExplain TEXT NON NULL)" SQL.execute_ outConn $ "CREATE INDEX results__exprLit ON results (exprLit);" SQL.execute_ outConn $ "CREATE INDEX results__wiktBroad ON results (wiktBroad);" SQL.execute_ outConn $ "CREATE INDEX results__synthBroad ON results (synthBroad);" SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS errors (exprId INTEGER NON NULL, exprLit TEXT NON NULL, error TEXT NON NULL)" SQL.fold wiktConn "SELECT id,word,sounds FROM wiktionary WHERE lang_code='fr' LIMIT 1000" () (1 :: Int) \ !i ( exprId :: Int , maybeWord :: Maybe Text , sounds :: [Wiktionary.Sound] ) -> do case maybeWord of Nothing -> return () Just exprLit -> case exprLit & Pron.run French.pronunciationRules of Left err -> do SQL.execute outConn "INSERT INTO errors (exprId, exprLit, error) VALUES (?,?,?)" ( exprId , exprLit , err & pShowNoColor ) Right lexemes -> do let synthExplain = lexemes <&> (\Pron.Pron{pronInput, pronRule} -> Pron.chars pronInput <> " → " <> show (Pron.rulePron pronRule)) & List.intercalate "; " let synthBroad = lexemes & foldMap (Pron.pronRule >>> Pron.rulePron >>> Pron.pronunciationText) forM_ sounds \Wiktionary.Sound{..} -> do -- IO.hPrint IO.stderr (i, ident::Int, maybeWord) case sound_ipa of Just (IPA.IPAPhons exprIPAs) -> do forM_ exprIPAs \case IPA.IPAPhonemic exprBroad -> do when (i `mod` 5000 == 0) do IO.hPrint IO.stderr (i, exprLit, exprBroad) SQL.execute outConn "INSERT INTO results(exprLit, wiktBroad, wiktBroadSegments, synthBroad, synthExplain) VALUES(?,?,?,?,?);" ( exprLit , exprBroad & IPA.ipaWordsToText Nothing , exprBroad & IPA.dropSupraSegmentalFeatures & IPA.ipaWordsToText Nothing , synthBroad , synthExplain ) -- & traceShowId IPA.IPAPhonError (_errMsg, _err) -> return () _ -> return () _ -> return () return (i + 1) return @IO () -} {- mangleSupraSegmentalFeatures :: [IPA.Syllable []] -> [[IPA.Segment]] mangleSupraSegmentalFeatures = \case [] -> [] IPA.Syllable syl : ts -> syl : mangleSupraSegmentalFeatures ts IPA.WithSuprasegmentalFeature IPA.Linking syl : ts -> case mangleSupraSegmentalFeatures ts of [] -> [dropSupraSegmentalFeatures syl] x : xs -> (dropSupraSegmentalFeatures syl <> x) : xs IPA.WithSuprasegmentalFeature _feat syl : xs -> mangleSupraSegmentalFeatures (syl : xs) -} -- cardsHtml "syllabes" $ French.syllablesTable & French.syllablesTableToHTML sentences :: Set [ExampleLiteral] sentences = [ [ ["aiguille"] , ["ail"] , ["aimer"] , ["ampoules"] , ["couvent"{exampleLiteralMeaning = "couver"}] , ["couvent"{exampleLiteralMeaning = "monastère"}] , ["papillon"] ] , French.pronunciationRules & Pron.examples ] & mconcat