From cdb73cf270dcd3f6a567797d214e49dad42f8f2c Mon Sep 17 00:00:00 2001 From: Julien Moutinho Date: Sat, 18 Oct 2025 01:34:11 +0200 Subject: [PATCH] update --- flake.nix | 1 + src/Language/Pronunciation.hs | 135 +++------------------------------- tests/Language/EnglishSpec.hs | 3 + tests/Language/FrenchSpec.hs | 8 ++ tests/Spec.hs | 6 +- 5 files changed, 25 insertions(+), 128 deletions(-) diff --git a/flake.nix b/flake.nix index 278cbef..f51f987 100644 --- a/flake.nix +++ b/flake.nix @@ -99,6 +99,7 @@ pkgs.reuse pkgs.xdot pkgs.gnuplot + pkgs.fswatch ] ++ inputs.self.checks.${system}.git-hooks-check.enabledPackages; withHoogle = false; diff --git a/src/Language/Pronunciation.hs b/src/Language/Pronunciation.hs index 28c09c7..f61b2a2 100644 --- a/src/Language/Pronunciation.hs +++ b/src/Language/Pronunciation.hs @@ -605,101 +605,11 @@ words prons = word0 : words next Left c | c & Char.isSpace -> True _ -> False -data Input = Input - { inputText :: Text - , inputPhonetic :: [IPA.Syllable []] - , inputMeaning :: Maybe ShortText - } - -patterns :: Map PatKey PatNode -patterns = - [ PatKeyNext (PatContextChar 'a') := - [ PatKeyNext PatContextLexicalBorder := - PatEnd ["a" := "ə"] - ] - , PatKeyNext (PatContextChar 't') := - [ PatKeyNext (PatContextChar 'h') := - [ PatKeyNext (PatContextChar 'e') := - [ PatKeyNext (PatContextLexicalCategory Char.Space) := - PatEnd ["the" := "zi"] - ] - ] - ] - ] - -data State = State - { stateInput :: LZ.Zipper Inp - , stateBuffer :: [PatKey] - , statePats :: Map PatKey PatNode - , statePatReset :: Bool - } - -parse :: Map PatKey PatNode -> Text -> [Inp] -parse initPats input = - loop - State - { stateInput = input & Text.unpack & fmap charToInp & LZ.fromList - , stateBuffer = [] - , statePats = initPats - , statePatReset = True - } - & stateInput - & LZ.toList - where - charToInp :: Char -> Inp - charToInp c = - Inp - { inpPats = [PatKeyNext (PatContextChar c)] - , inpPronunciations = [] - } - loop :: State -> State - loop st = - [ look (key, val) st - | (key, val) <- st & statePats & Map.toList - ] - & catMaybes - & headMaybe - & fromMaybe (loop st{statePats = initPats}) - look :: (PatKey, PatNode) -> State -> Maybe State - look kv@(key, val) st = - case key of - PatKeyPrev patPrev -> errorShow ("prev" :: Text) - PatKeyNext patNext -> - case patNext of - PatContextLexicalBorder - | stateInput st & LZ.endp -> match kv st - -- PatContextChar c - -- | Just inpNext <- stateInput st & LZ.safeCursor -> - -- inpNext & inpPats & - -- case of - -- Nothing -> - _ -> Nothing - match kv@(key, val) st = - case val of - PatEnd pron -> - Just - st - { statePats = initPats - , stateBuffer = [] - , stateInput = - stateInput st - & LZ.insert - Inp - { inpPats = stateBuffer st & (key :) & List.reverse - , inpPronunciations = pron - } - } - PatNode pats -> - Just - st - { statePats = pats - , stateBuffer = key : stateBuffer st - } {- case statePats st & Map.lookup k of Nothing -> st - Just (PatNode pats) -> + Just (PatTree pats) -> loop st {statePats = pats, stateBuffer = k : stateBuffer st} Just (PatEnd end) -> loop st { statePats = initPats @@ -714,17 +624,17 @@ case statePats st & Map.lookup k of -} {- -parse :: PatNode -> Text -> [Inp] +parse :: PatTree -> Text -> [Inp] parse initPats input = let inpZip = input & Text.unpack & fmap charToInp & LZ.fromList in runInp [] initPats inpZip & LZ.toList where charToInp :: Char -> Inp charToInp c = Inp - { inpPats = [PatKeyNext (PatContextChar c)] + { inpPats = [PosNext (PatternChar c)] , inpPronunciations = [] } - runInp :: [PatKey] -> PatNode -> LZ.Zipper Inp -> LZ.Zipper Inp + runInp :: [Pos] -> PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp runInp oks pat inp = traceShow ("runInp"::Text, ("oks"::Text) := oks, ("cur"::Text) := LZ.safeCursor inp) $ case pat of @@ -737,17 +647,17 @@ parse initPats input = } & LZ.right & runInp [] initPats - PatNode pats -> + PatTree pats -> -- the pattern may go on case inp & LZ.safeCursor of Nothing -> inp - & runPat [] oks [PatKeyNext PatContextLexicalBorder] pats + & runPat [] oks [PosNext PatternLexicalBorder] pats Just cur -> inp & LZ.delete & runPat [] oks (inpPats cur & List.sort) pats - runPat :: [PatKey] -> [PatKey] -> [PatKey] -> Map PatKey PatNode -> LZ.Zipper Inp -> LZ.Zipper Inp + runPat :: [Pos] -> [Pos] -> [Pos] -> Map Pos PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp runPat kos oks todos pats inp = traceShow ( "runPat"::Text , ("kos"::Text) := kos @@ -764,7 +674,7 @@ parse initPats input = --traceShow ("runPat/[]"::Text) $ inp & (if null kos then id else runInp kos (PatEnd [])) - & runInp oks (PatNode pats) + & runInp oks (PatTree pats) k:ks -> case pats & Map.lookup k of -- the pattern ends @@ -775,7 +685,7 @@ parse initPats input = & (if null kos then id else runInp kos (PatEnd [])) & runInp (k:oks) (PatEnd end) -- the pattern advances - Just (PatNode nextPats) -> + Just (PatTree nextPats) -> --traceShow ("runPat/Node"::Text) $ inp & runPat kos (k:oks) ks nextPats -- the pattern does not advance @@ -783,33 +693,6 @@ parse initPats input = inp & runPat (k:kos) oks ks pats -} -data PatContext - = PatContextChar Char - | PatContextLexicalCategory Char.GeneralCategory - | PatContextLexicalBorder - | PatContextPhoneticVowel - | PatContextPhoneticSemiVowel - | PatContextPhoneticConsonant - deriving (Eq, Ord, Show) - -data Inp = Inp - { inpPats :: [PatKey] - , inpPronunciations :: Pronunciations - } - deriving (Show) - -data PatKey - = PatKeyPrev PatContext - | PatKeyNext PatContext - deriving (Eq, Ord, Show) -data PatNode - = PatNode (Map PatKey PatNode) - | PatEnd Pronunciations - deriving (Show) -instance IsList PatNode where - type Item PatNode = (PatKey, PatNode) - fromList = PatNode . Map.fromListWith (errorShow) - toList = errorShow data Lexeme = LexemeBorder diff --git a/tests/Language/EnglishSpec.hs b/tests/Language/EnglishSpec.hs index 478351b..f2e8232 100644 --- a/tests/Language/EnglishSpec.hs +++ b/tests/Language/EnglishSpec.hs @@ -418,7 +418,10 @@ pronunciationRules = , "up" := Rule{rulePron = ["up" := "ʌp"], ruleExamples = ["up" := "ʌp"]} , "utah" := Rule{rulePron = ["u" := "ˈjuː", "tah" := "ˌtɑː"], ruleExamples = ["utah" := "ˈjuː.ˌtɑː"]} , "veloci" := Rule{rulePron = ["ve" := "və", "lo" := "ˌlɒ", "ci" := "sɪ"], ruleExamples = ["velociraptor" := "və.ˌlɒ.sɪ.ˈɹæp.tɚ"]} + , "therizino" := Rule{rulePron = ["the":="ˌθɛ", "ri":="rə", "zi":="ˌzɪ", "no":="noʊ"], ruleExamples = ["therizinosaurus" := "ˌθɛ.rə.ˌzɪ.noʊ.ˈsɔː.ɹəs"]} + , "deinonychus" := Rule{rulePron = ["dei":="daɪ", "no":="ˈnɒ", "ny":="nɪ", "chus":="kəs"], ruleExamples = ["deinonychus" := "daɪ.ˈnɒ.nɪ.kəs"]} , "wait" := Rule{rulePron = ["wait" := "weɪt"], ruleExamples = ["wait" := "weɪt"]} + , "dreadnoughtus" := Rule{rulePron = ["dread" := "ˈdɹɛd", "nough" := "nɔː", "tus":="təs"], ruleExamples = ["dreadnoughtus" := "ˈdɹɛd.nɔː.təs"]} , "waits" := Rule{rulePron = ["waits" := "weɪts"], ruleExamples = ["waits" := "weɪts"]} , "walk" := Rule{rulePron = ["walk" := "wɔːk"], ruleExamples = ["walk" := "wɔːk"]} , "walks" := Rule{rulePron = ["walks" := "wɔːks"], ruleExamples = ["walks" := "wɔːks"]} diff --git a/tests/Language/FrenchSpec.hs b/tests/Language/FrenchSpec.hs index d56bfd2..13940b9 100644 --- a/tests/Language/FrenchSpec.hs +++ b/tests/Language/FrenchSpec.hs @@ -444,9 +444,15 @@ pronunciationRules = , "na" := Rule{rulePron = ["na" := "na"], ruleExamples = ["na" := "na"]} , "nette" := Rule{rulePron = ["nette" := "nɛt"], ruleExamples = ["lunette" := "ly.nɛt", "lunettes" := "ly.nɛt"]} , "niche" := Rule{rulePron = ["niche" := "niʃ"], ruleExamples = ["péniche" := "pe.niʃ"]} + --, "ny" := Rule{rulePron = ["ny" := "ni"], ruleExamples = ["nylon" := "ni.lɔ̃"]} + , "dei" := Rule{rulePron = ["de" := "de", "i":="i"], ruleExamples = ["deinonychus" := "de.i.nɔ.ni.kys"]} + , "nychus" := Rule{rulePron = ["ny":="ni", "chus" := "kys"], ruleExamples = ["deinonychus" := "de.i.nɔ.ni.kys"]} , "nnet" := Rule{rulePron = ["nnet" := "nɛ"], ruleExamples = ["bonnet" := "bɔ.nɛ"]} , "noc" := Rule{rulePron = ["noc" := "nɔk"], ruleExamples = ["nocturne" := "nɔk.tyʁn"]} , "nou" := Rule{rulePron = ["nou" := "nu"], ruleExamples = ["nourrit" := "nu.ʁi"]} + , "dread" := Rule{rulePron = ["dread" := "dʁɛd"], ruleExamples = ["dreadnought" := "dʁɛd.nɔt"]} + , "nought" := Rule{rulePron = ["nought" := "nɔt"], ruleExamples = ["dreadnought" := "dʁɛd.nɔt"]} + , "noughtus" := Rule{rulePron = ["nough" := "nɔ", "tus":="tys"], ruleExamples = ["dreadnoughtus" := "dʁɛd.nɔ.tys"]} , "mmouth" := Rule{rulePron = ["mmouth" := "mut"], ruleExamples = ["mammouth" := "ma.mut"]} , "o" := Rule{rulePron = ["o" := "ɔ"], ruleExamples = ["orange" := "ɔ.ʁɑ̃ʒ"]} , "ou" := Rule{rulePron = ["ou" := "u"], ruleExamples = ["ou" := "u"]} @@ -483,6 +489,7 @@ pronunciationRules = , "aigle" := Rule{rulePron = ["aigle" := "ɛgl"], ruleExamples = ["aigle" := "ɛgl"]} , "cor" <> [LexemeConsonant] := Rule{rulePron = ["cor" <> [LexemeConsonant] := "kɔʁ"], ruleExamples = ["corbeau" := "kɔʁ.bo"]} , "ri" := Rule{rulePron = ["ri" := "ʁi"], ruleExamples = ["rideau" := "ʁi.do"]} + , "zi" := Rule{rulePron = ["zi" := "zi"], ruleExamples = ["zizi" := "zi.zi"]} , "rhi" := Rule{rulePron = ["rhi" := "ʁi"], ruleExamples = ["rhinocéros" := "ʁi.nɔ.se.ʁɔs"]} , "no" := Rule{rulePron = ["no" := "nɔ"], ruleExamples = ["rhinocéros" := "ʁi.nɔ.se.ʁɔs"]} , "cé" := Rule{rulePron = ["cé" := "se"], ruleExamples = ["rhinocéros" := "ʁi.nɔ.se.ʁɔs"]} @@ -502,6 +509,7 @@ pronunciationRules = , "ré" := Rule{rulePron = ["ré" := "ʁe"], ruleExamples = ["révolution" := "ʁe.vɔ.ly.sjɔ̃"]} , "sa" := Rule{rulePron = ["sa" := "sa"], ruleExamples = ["satellite" := "sa.te.lit"]} , "sau" := Rule{rulePron = ["sau" := "sɔ"], ruleExamples = ["sauter" := "sɔ.te"]} + , "the" := Rule{rulePron = ["the":="tɛ"], ruleExamples = ["therizinosaurus" := "tɛ.ʁi.zi.nɔ.zɔ.ʁys"]} , [LexemeVowel] <> "sau" := Rule{rulePron = [[LexemeVowel] <> "sau" := "zɔ"], ruleExamples = ["dinosaurien" := "di.nɔ.zɔ.ʁjɛ̃"]} , [LexemeVowel] <> "saure" := Rule{rulePron = ["saure" := "zɔʁ"], ruleExamples = ["dinosaure" := "di.nɔ.zɔʁ"]} , begining "singe" := Rule{rulePron = [begining "singe" := "sɛ̃ʒ"], ruleExamples = ["singe" := "sɛ̃ʒ"]} diff --git a/tests/Spec.hs b/tests/Spec.hs index 9fbdb90..ac6b70c 100644 --- a/tests/Spec.hs +++ b/tests/Spec.hs @@ -12,15 +12,17 @@ import Rosetta.ReadingSpec qualified import Rosetta.WritingSpec qualified import WiktionarySpec qualified import Worksheets.Utils.Prelude +import Utils.Pronunciation qualified main :: IO () main = sydTest spec spec = do + Utils.Pronunciation.spec -- RecipesSpec.spec -- MathSpec.spec - WiktionarySpec.spec + --WiktionarySpec.spec -- xdescribe "Language" do -- describe "Chinese" do -- Language.ChineseSpec.spec @@ -31,7 +33,7 @@ spec = do Language.EnglishSpec.spec describe "FrenchSpec" do Language.FrenchSpec.spec - -- + withoutRetries do describe "Rosetta" do describe "ReadingSpec" do -- 2.49.0