{-# LANGUAGE OverloadedLists #-} module Utils.Pronunciation where import Control.Monad.Trans.Class qualified as MT import Control.Monad.Trans.State qualified as MT import Data.ByteString.Builder qualified as ByteString.Builder import Data.Char qualified as Char import Data.GenValidity.Map () import Data.GenValidity.Sequence () import Data.GenValidity.Set () import Data.GenValidity.Text () import Data.List qualified as List import Data.List.Zipper qualified as LZ import Data.Map.Strict qualified as Map import Data.Text qualified as Text import Data.Text.Encoding qualified as Text import Data.Text.Lazy qualified as Text.Lazy import Data.Text.Lazy.Encoding qualified as Text.Lazy import Data.Text.Short qualified as TextShort import Data.Validity.Map () import Data.Validity.Set () import Data.Validity.Text () import Language.Pronunciation (Pronunciation (..), Pronunciations (..)) import Language.Pronunciation qualified as Pron import Test.Syd import Utils.Tests import Worksheets.Utils.HTML (Html) import Worksheets.Utils.HTML qualified as HTML import Worksheets.Utils.IPA qualified as IPA import Worksheets.Utils.Prelude cardsHtml :: String -> IO Html -> TestDefM (outers) () () cardsHtml title html = do outPath <- goldenPath title "html" builder <- html <&> HTML.renderMarkupBuilder & liftIO it title do goldenByteStringBuilderFile outPath (return builder) lexerRender rules = do let title = "lexer" outPath <- goldenPath title "html" builder <- rules & Pron.tableHtml <&> HTML.renderMarkupBuilder & liftIO it title do goldenByteStringBuilderFile outPath (return builder) pronunciationDecompositionTest :: Pron.Table -> String -> TestDefM (outers) () () pronunciationDecompositionTest rules sentence = do outPath <- goldenPath sentence "txt" it sentence do pureGoldenByteStringBuilderFile outPath $ case sentence & Text.pack & Pron.run rules of Right x -> x & foldMap ( \case Left c -> [Text.singleton c, "\n"] & mconcat Right Pron.Pron{pronInput, pronRule} -> [ pronInput & Pron.lexemesChars & Text.pack , " → " , pronRule & Pron.rulePron & Pron.unPronunciations & foldMap (snd >>> Pron.pronunciationText) , "\n" ] & mconcat ) & Text.encodeUtf8 & ByteString.Builder.byteString err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString {- pronunciationResultTest :: Pron.Table -> String -> TestDefM (outers) () () pronunciationResultTest rules sentence = do outPath <- goldenPath sentence "txt" it sentence do pureGoldenByteStringBuilderFile outPath $ case sentence & Text.pack & Pron.run rules of Right x -> x & foldMap \Pron.Pron{pronInput, pronRule} -> pronInput & Text.encodeUtf8 & ByteString.Builder.byteString err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString -} pronunciationParserTest :: Pron.Table -> Map Pron.InputLexemes Pron.Pronunciation -> TestDefM (outers) () () pronunciationParserTest rules sentences = do forM_ (sentences & Map.toList) \(inp, exp :: Pron.Pronunciation) -> do it (inp & Pron.unInputLexemes & Pron.lexemesChars & show) do let got :: Pron.Pronunciation = inp & Pron.unInputLexemes & Pron.runParser rules & either errorShow id & foldMap ( either ( \c -> if Char.isLetter c then errorShow ("missing phonetic for" :: Text, c, "in" :: Text, inp) else Pron.Pronunciations [ "" := Pron.Pronunciation { pronunciationIPABroad = [] , pronunciationText = Text.singleton c } ] ) (Pron.pronRule >>> Pron.rulePron) ) & Pron.joinPronunciations & Pron.unPronunciations & List.head & snd Pron.pronunciationText got `shouldBe` Pron.pronunciationText exp {- let baseName = sentence & foldMap \Pron.ExampleLiteral{..} -> [ [TextShort.unpack exampleLiteralText] , if TextShort.null exampleLiteralMeaning then [] else ["{", exampleLiteralMeaning & TextShort.unpack, "}"] ] & mconcat & mconcat outPath <- goldenPath baseName "txt" it baseName do pureGoldenByteStringBuilderFile outPath $ case sentence & Pron.parseLiterals rules of Right x -> x & foldMap ( \case Left c -> [Text.singleton c, "\n"] & mconcat Right Pron.Pron{pronInput, pronRule} -> -- Chunk{chunkInterval = trans, chunkMachine = Pron.Machine{machinePron = pron}} [ pronRule & Pron.rulePron & Pron.unPronunciations & foldMap (snd >>> Pron.pronunciationText) , " ← " , show pronInput & Text.pack , "\n" ] & mconcat ) & Text.encodeUtf8 & ByteString.Builder.byteString err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString -} data Lex = LexChar Char | LexBorder deriving (Eq, Show) data State = State { statePrev :: [(Lex, [Char])] , stateNext :: [Lex] } deriving (Show) readFrench :: [Char] -> [(Lex, [Char])] readFrench inp = go State{statePrev = [], stateNext = (LexChar <$> inp) <> [LexBorder]} where goMany fts State{statePrev, stateNext = n : next} | let stack = n : (statePrev <&> fst) , stack & List.isPrefixOf (fts <&> fst & List.reverse) = -- \|| traceShow (stack, fts) False Just State{statePrev = List.reverse fts <> List.drop (List.length fts - 1) statePrev, stateNext = next} goMany _ _ = Nothing go State{statePrev, stateNext = []} = statePrev & List.reverse go (goMany [LexChar 'C' := "s", LexChar 'E' := "ə"] -> Just st) = go st go (goMany [LexChar 'C' := "", LexChar 'H' := "ʃ"] -> Just st) = go st go (goMany [LexChar 'S' := "", LexChar 'S' := "s"] -> Just st) = go st go (goMany [LexChar 'A' := "", LexChar 'N' := "ɑ̃"] -> Just st) = go st go (goMany [LexChar 'A' := "", LexChar 'U' := "o"] -> Just st) = go st go (goMany [LexChar 'E' := "", LexChar 'I' := "ɛ"] -> Just st) = go st go ( goMany [ LexChar 'E' := "" , LexChar 'I' := "ɛ" , LexChar 'L' := "" , LexChar 'L' := "" , LexChar 'E' := "j" , LexBorder := "" ] -> Just st ) = go st go (goMany [LexChar 'T' := "", LexChar 'T' := "t"] -> Just st) = go st go (goMany [LexChar 'E' := "", LexChar 'T' := "t"] -> Just st) = go st go (goMany [LexChar 'E' := "", LexBorder := ""] -> Just st) = go st go (goMany [LexBorder := ""] -> Just st) = go st go (goMany [LexChar 'A' := "a"] -> Just st) = go st go (goMany [LexChar 'B' := "b"] -> Just st) = go st go (goMany [LexChar 'C' := "k"] -> Just st) = go st go (goMany [LexChar 'H' := ""] -> Just st) = go st go (goMany [LexChar 'I' := "i"] -> Just st) = go st go (goMany [LexChar 'L' := "l"] -> Just st) = go st go (goMany [LexChar 'O' := "ɔ"] -> Just st) = go st go (goMany [LexChar 'T' := "t"] -> Just st) = go st go (goMany [LexChar 'N' := "n"] -> Just st) = go st go (goMany [LexChar 'R' := "ʁ"] -> Just st) = go st go (goMany [LexChar 'S' := "s"] -> Just st) = go st go (goMany [LexChar 'E' := "ə"] -> Just st) = go st go (goMany [LexChar 'É' := "e"] -> Just st) = go st go (goMany [LexChar 'U' := "y"] -> Just st) = go st go (goMany [LexChar 'V' := "v"] -> Just st) = go st go st = errorShow st {- go (('C' := _):prev) (n@'E':next) = go ((n := "ə"):('C' := "s"):prev) next go (('S' := _):prev) ('S':next) = go (('S' := ""):('S' := "s"):prev) next go (((isVowel->True) := _):prev) ('S':next@((isVowel->True):_)) = go (('S' := "z"):prev) next go prev next = errorShow (prev, next) -} isConsonant = isVowel >>> not isVowel 'A' = True isVowel 'E' = True isVowel 'É' = True isVowel 'È' = True isVowel 'Ê' = True isVowel 'I' = True isVowel 'O' = True isVowel 'U' = True isVowel 'Y' = True isVowel _ = False spec = do describe "Pronunciation" do describe "test" do it "ABEILLE" do readFrench "ABEILLE" `shouldBe` [ LexChar 'A' := "a" , LexChar 'B' := "b" , LexChar 'E' := "" , LexChar 'I' := "ɛ" , LexChar 'L' := "" , LexChar 'L' := "" , LexChar 'E' := "j" , LexBorder := "" ] it "ASSURANCE" do readFrench "ASSURANCE" `shouldBe` [ LexChar 'A' := "a" , LexChar 'S' := "" , LexChar 'S' := "s" , LexChar 'U' := "y" , LexChar 'R' := "ʁ" , LexChar 'A' := "" , LexChar 'N' := "ɑ̃" , LexChar 'C' := "s" , LexChar 'E' := "" , LexBorder := "" ] it "CHEVAUCHE" do readFrench "CHEVAUCHE" `shouldBe` [ LexChar 'C' := "" , LexChar 'H' := "ʃ" , LexChar 'E' := "ə" , LexChar 'V' := "v" , LexChar 'A' := "" , LexChar 'U' := "o" , LexChar 'C' := "" , LexChar 'H' := "ʃ" , LexChar 'E' := "" , LexBorder := "" ] it "NÉCESSAIRE" do readFrench "NÉCESSAIRE" `shouldBe` [ LexChar 'N' := "n" , LexChar 'É' := "e" , LexChar 'C' := "s" , LexChar 'E' := "ə" , LexChar 'S' := "" , LexChar 'S' := "s" , LexChar 'A' := "a" , LexChar 'I' := "i" , LexChar 'R' := "ʁ" , LexChar 'E' := "" , LexBorder := "" ] -- () `shouldBe` () -- buildAna "a" `shouldBe` Ana [ "a" := ["a"] ] -- buildAna "as" `shouldBe` Ana [ "as" := ["az"] ] -- buildAna "ass" `shouldBe` Ana [ "ass" := ["as"] ] it "CAROTTE" do readFrench "CAROTTE" `shouldBe` [ LexChar 'C' := "k" , LexChar 'A' := "a" , LexChar 'R' := "ʁ" , LexChar 'O' := "ɔ" , LexChar 'T' := "" , LexChar 'T' := "t" , LexChar 'E' := "" , LexBorder := "" ] -- () `shouldBe` () -- buildAna "a" `shouldBe` Ana [ "a" := ["a"] ] -- buildAna "as" `shouldBe` Ana [ "as" := ["az"] ] -- buildAna "ass" `shouldBe` Ana [ "ass" := ["as"] ] -- it "assurance" do -- ana_assurance `shouldBe` ana_assurance -- outPath <- goldenPath "1" "txt" -- it "1" do -- pureGoldenByteStringBuilderFile outPath $ -- mergeFrench -- & pShowNoColor -- & Text.Lazy.encodeUtf8 -- & ByteString.Builder.lazyByteString {- describe "sentence" do forM_ sentences \sentence -> do outPath <- goldenPath sentence "txt" it sentence do pureGoldenByteStringBuilderFile outPath $ case sentence & Text.pack & parse patterns of err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString Right x -> x & pShowNoColor & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString {- x & foldMap ( \case Left c -> [Text.singleton c, "\n"] & mconcat Right Pron.Pron{pronInput, pronRule} -> [ pronInput & Pron.lexemesChars & Text.pack , " → " , pronRule & Pron.rulePron & Pron.unPronunciations & foldMap (snd >>> Pron.pronunciationText) , "\n" ] & mconcat ) & Text.encodeUtf8 & ByteString.Builder.byteString -} -} sentences :: [String] sentences = [ "assurance" , "exercice" , "nécessaire" ] type (:=) a b = (a, b) data Inp = InpChar Char | InpBorder ana1 :: [Inp] -> [Inp := [[Inp] := [IPA.Syllable []]]] ana1 = go where go :: [Inp] -> [Inp := [[Inp] := [IPA.Syllable []]]] go [] = [] go (i@(InpChar 'a') : next) = [i := [[i] := ["a"]]] <> go next {- data Dap k a = Dap { dapDefault :: a, dap :: Map k a } type Phon = [IPA.Syllable []] type CaseNextString = Dap String CasePrevPhon type CasePrevPhon = Dap CasePhon CaseNextPhon type CaseNextPhon = Dap CasePhon Phon cases :: CaseNextString cases = [ "a" := Dap ["a"] [] , "au" := Dap ["o"] [] , "ay" := Dap ["ɛ"] [] , "ai" := Dap ["ɛ"] [] , "c" := Dap [] -- , "ch" := -- , "s" := -- , "ss" := ] -- a := ["a"] type Phon = IPA.Syllable [] type AnaMap = ([Char], [Phon]) data Ana = Ana { anaDecomp :: [AnaMap] } deriving (Eq, Show) -- assurance -- a | a | a = a -- as | az | vowel + s = z -- ass | as | s + s = s ana_assurance = Ana [ "a" := ["a."] , "ss" := ["s"] , "u" := ["y."] , "r" := ["ʁ"] , "an" := ["ɑ̃"] , "ce" := ["s."] ] ana_exercice = Ana [ "e" := ["ɛ"] , "x" := ["g", "z"] , "er" := ["ɛʁ"] , "cice" := ["sis"] ] ana_nécessaire = Ana [ "né" := ["ne"] , "ce" := ["sé"] , "ssaire" := ["sɛʁ"] ] buildAna :: String -> Ana buildAna = List.foldl' (\acc c -> appendAna acc (InpChar c)) (Ana []) appendAna :: Ana -> Inp -> Ana appendAna (Ana []) (InpChar c) = case c of 'a' -> Ana [ "a" := ["a"] ] 'b' -> Ana [ "b" := ["be"] ] 'c' -> Ana [ "c" := ["se"] ] 'd' -> Ana [ "d" := ["de"] ] 'e' -> Ana [ "e" := ["ə"] ] 'f' -> Ana [ "f" := ["ɛf"] ] 'g' -> Ana [ "g" := ["g"] ] 'h' -> Ana [ "h" := ["aʃ"] ] 'i' -> Ana [ "i" := ["i"] ] 'j' -> Ana [ "j" := ["ʒi"] ] 'k' -> Ana [ "k" := ["ka"] ] 'l' -> Ana [ "l" := ["ɛl"] ] 'm' -> Ana [ "m" := ["ɛm"] ] 'n' -> Ana [ "n" := ["ɛn"] ] 'o' -> Ana [ "o" := ["o"] ] 'p' -> Ana [ "p" := ["pe"] ] 'q' -> Ana [ "q" := ["ky"] ] 'r' -> Ana [ "r" := ["ɛʁ"] ] 's' -> Ana [ "s" := ["ɛs"] ] 't' -> Ana [ "t" := ["te"] ] 'u' -> Ana [ "u" := ["y"] ] 'v' -> Ana [ "v" := ["ve"] ] 'w' -> Ana [ "w" := ["dubl", "ve"] ] 'x' -> Ana [ "x" := ["ɛks"] ] 'y' -> Ana [ "y" := ["igʁɛk"] ] 'z' -> Ana [ "z" := ["zɛd"] ] _ -> errorShow c appendAna (anaPrevPhonSeg -> Just IPA.Vowel{}) (InpChar 's') = (Ana ["as":=["az"]]) appendAna ana (InpChar 's') | ana & anaPrevSuffix "s" = ana & anaPrevMap \case (List.isSuffixOf "s" -> True) := _ -> "ss" := ["s"] appendAna x y = errorShow (x, y) anaPrev (Ana xs) = xs & lastMaybe anaPrevMap :: (AnaMap -> AnaMap) -> Ana -> Ana anaPrevMap f (Ana l) = case l & List.reverse of x : xs -> Ana $ (f x : xs) & List.reverse [] -> Ana [] anaPrevSuffix :: [Char] -> Ana -> Bool anaPrevSuffix suf ana = ana & anaPrev & \case Just (cs := _) -> cs & List.isSuffixOf suf _ -> False anaPrevPhonSeg x = x & anaPrev >>= \case _ := phon | Just lastPhon <- phon & lastMaybe , segs <- lastPhon & IPA.syllableToSegments , Just lastSeg <- segs & lastMaybe <&> IPA.dropSegmentalFeatures -> Just lastSeg _ -> Nothing data Input = Input { inputText :: Text , inputPhonetic :: [IPA.Syllable []] , inputMeaning :: Maybe ShortText } data Pos = PosPrev | PosNext deriving (Eq, Ord, Show) data Step = Step { stepPos :: Pos , stepPat :: Pat } deriving (Eq, Ord, Show) data PatTree = PatTreeBranches (Map Step PatTree) | PatTreeLeaf Pronunciations deriving (Show) instance IsList PatTree where type Item PatTree = (Step, PatTree) fromList = PatTreeBranches . Map.fromListWith (errorShow) toList = errorShow data Action = Action { actionRead :: Pat , actionAfter :: Pat } data Parse = ParseReadChar (Map Char Parse) | ParsePhon [IPA.Syllable []] | ParseLook Parse | ParseLookBorder | ParseAhead Parse data Mol = Mol { molLangues :: Set String , molRead :: String , molReading :: String , molPhoneticPrepend :: Map MolPat [IPA.Syllable []] } data MolPat = MolPatChars String | MolPatVowel | MolPatConsonant | MolPatSemivowel deriving (Eq, Ord, Show) data Chunk = Chunk { chunkLangue :: [String] , chunkLetters :: String , chunkPhonetic :: [IPA.Syllable []] } deriving (Eq, Ord, Show) mol_french :: [Mol] mol_french = [ mol_french_s ] mol_french_s = Mol { molLangues = ["french"] , molRead = "" , molReading = "s" , molPhoneticPrepend = [ MolPatChars "" := ["ɛs"] , MolPatChars "s" := ["s"] ] } -- assurances -> a.ssu.rances -- -- a + s = /s -- s + s = /s mergeMol :: Mol -> Mol -> Mol mergeMol x y = x french = Chunk { chunkLangue = ["french"] , chunkLetters = [] , chunkPhonetic = [] } -- exposeChars :: [Char] -- exposePhons :: [IPA.Syllables []] data Merge = Merge Chunk Chunk Chunk deriving (Eq, Ord, Show) runMerge :: [Merge] -> Chunk -> Chunk -> Chunk runMerge ms x y = x mergeFrench :: [Merge] mergeFrench = [ merge french_a french_n ["ɑ̃"] ] where merge x y chunkPhonetic = Merge x y Chunk { chunkLangue = chunkLangue x , chunkLetters = chunkLetters x <> chunkLetters y , chunkPhonetic } -- Char + Char = -- Vowel + Char = -- Char + Vowel = -- Consonant + Char = -- Char + Consonant = french_a = french { chunkLetters = "a", chunkPhonetic = ["a"] } french_b = french { chunkLetters = "b", chunkPhonetic = ["be"] } french_c = french { chunkLetters = "c", chunkPhonetic = ["se"] } french_d = french { chunkLetters = "d", chunkPhonetic = ["de"] } french_e = french { chunkLetters = "e", chunkPhonetic = ["ə"] } french_f = french { chunkLetters = "f", chunkPhonetic = ["ɛf"] } french_g = french { chunkLetters = "g", chunkPhonetic = ["g"] } french_h = french { chunkLetters = "h", chunkPhonetic = ["aʃ"] } french_i = french { chunkLetters = "i", chunkPhonetic = ["i"] } french_j = french { chunkLetters = "j", chunkPhonetic = ["ʒi"] } french_k = french { chunkLetters = "k", chunkPhonetic = ["ka"] } french_l = french { chunkLetters = "l", chunkPhonetic = ["ɛl"] } french_m = french { chunkLetters = "m", chunkPhonetic = ["ɛm"] } french_n = french { chunkLetters = "n", chunkPhonetic = ["ɛn"] } french_o = french { chunkLetters = "o", chunkPhonetic = ["o"] } french_p = french { chunkLetters = "p", chunkPhonetic = ["pe"] } french_q = french { chunkLetters = "q", chunkPhonetic = ["ky"] } french_r = french { chunkLetters = "r", chunkPhonetic = ["ɛʁ"] } french_s = french { chunkLetters = "s", chunkPhonetic = ["ɛs"] } french_t = french { chunkLetters = "t", chunkPhonetic = ["te"] } french_u = french { chunkLetters = "u", chunkPhonetic = ["y"] } french_v = french { chunkLetters = "v", chunkPhonetic = ["ve"] } french_w = french { chunkLetters = "w", chunkPhonetic = ["dubl", "ve"] } french_x = french { chunkLetters = "x", chunkPhonetic = ["ɛks"] } french_y = french { chunkLetters = "y", chunkPhonetic = ["igʁɛk"] } french_z = french { chunkLetters = "z", chunkPhonetic = ["zɛd"] } type Patterns = Map Step PatTree patterns :: Map Step PatTree patterns = [ Step PosNext (PatChar 'a') := [ Step PosNext PatLexicalBorder := PatTreeLeaf ["a" := "ə"] ] , Step PosNext (PatChar 't') := [ Step PosNext (PatChar 'h') := [ Step PosNext (PatChar 'e') := [ Step PosNext (PatLexicalCategory Char.Space) := PatTreeLeaf ["the" := "zi"] ] ] ] ] data State = State { stateInput :: LZ.Zipper Inp , stateBuffer :: [Step] , statePats :: Patterns , statePatReset :: Bool } data Pat = PatChar Char | PatLexicalCategory Char.GeneralCategory | PatLexicalBorder | PatPhoneticVowel | PatPhoneticSemiVowel | PatPhoneticConsonant deriving (Eq, Ord, Show) data Inp = Inp { inpSteps :: [Step] , inpPronunciations :: Pronunciations } deriving (Show) parse :: Patterns -> Text -> Either String [Inp] parse initPats input = loop State { stateInput = input & Text.unpack & fmap charToInp & LZ.fromList , stateBuffer = [] , statePats = initPats , statePatReset = True } & stateInput & LZ.toList & Right where charToInp :: Char -> Inp charToInp c = Inp { inpSteps = [Step { stepPos = PosNext, stepPat = PatChar c }] , inpPronunciations = [] } loop :: State -> State loop st = [ look (key, val) st | (key, val) <- st & statePats & Map.toList ] & catMaybes & headMaybe & fromMaybe (loop st{statePats = initPats}) look :: (Step, PatTree) -> State -> Maybe State look kv@(key, val) st = case key of Step{stepPos = PosPrev, stepPat = patPrev} -> errorShow ("prev" :: Text) Step{stepPos = PosNext, stepPat = patNext} -> case patNext of PatLexicalBorder | stateInput st & LZ.endp -> match kv st -- PatChar c -- | Just inpNext <- stateInput st & LZ.safeCursor -> -- inpNext & inpSteps & -- case of -- Nothing -> _ -> Nothing match kv@(key, val) st = case val of PatTreeLeaf pron -> Just st { statePats = initPats , stateBuffer = [] , stateInput = stateInput st & LZ.insert Inp { inpSteps = stateBuffer st & (key :) & List.reverse , inpPronunciations = pron } } PatTreeBranches pats -> Just st { statePats = pats , stateBuffer = key : stateBuffer st } -}