tests/Utils/Pronunciation.hs

   1 {-# LANGUAGE OverloadedLists #-}
   2
   3 module Utils.Pronunciation where
   4
   5 import Control.Monad.Trans.Class qualified as MT
   6 import Control.Monad.Trans.State qualified as MT
   7 import Data.ByteString.Builder qualified as ByteString.Builder
   8 import Data.Char qualified as Char
   9 import Data.GenValidity.Map ()
  10 import Data.GenValidity.Sequence ()
  11 import Data.GenValidity.Set ()
  12 import Data.GenValidity.Text ()
  13 import Data.List qualified as List
  14 import Data.List.Zipper qualified as LZ
  15 import Data.Map.Strict qualified as Map
  16 import Data.Text qualified as Text
  17 import Data.Text.Encoding qualified as Text
  18 import Data.Text.Lazy qualified as Text.Lazy
  19 import Data.Text.Lazy.Encoding qualified as Text.Lazy
  20 import Data.Text.Short qualified as TextShort
  21 import Data.Validity.Map ()
  22 import Data.Validity.Set ()
  23 import Data.Validity.Text ()
  24 import Language.Pronunciation (Pronunciation (..), Pronunciations (..))
  25 import Language.Pronunciation qualified as Pron
  26 import Test.Syd
  27 import Utils.Tests
  28 import Worksheets.Utils.HTML (Html)
  29 import Worksheets.Utils.HTML qualified as HTML
  30 import Worksheets.Utils.IPA qualified as IPA
  31 import Worksheets.Utils.Prelude
  32
  33 cardsHtml :: String -> IO Html -> TestDefM (outers) () ()
  34 cardsHtml title html = do
  35   outPath <- goldenPath title "html"
  36   builder <- html <&> HTML.renderMarkupBuilder & liftIO
  37   it title do
  38     goldenByteStringBuilderFile outPath (return builder)
  39
  40 lexerRender rules = do
  41   let title = "lexer"
  42   outPath <- goldenPath title "html"
  43   builder <- rules & Pron.tableHtml <&> HTML.renderMarkupBuilder & liftIO
  44   it title do
  45     goldenByteStringBuilderFile outPath (return builder)
  46
  47 pronunciationDecompositionTest :: Pron.Table -> String -> TestDefM (outers) () ()
  48 pronunciationDecompositionTest rules sentence = do
  49   outPath <- goldenPath sentence "txt"
  50   it sentence do
  51     pureGoldenByteStringBuilderFile outPath $
  52       case sentence & Text.pack & Pron.run rules of
  53         Right x ->
  54           x
  55             & foldMap
  56               ( \case
  57                   Left c -> [Text.singleton c, "\n"] & mconcat
  58                   Right Pron.Pron{pronInput, pronRule} ->
  59                     [ pronInput & Pron.lexemesChars & Text.pack
  60                     , " → "
  61                     , pronRule & Pron.rulePron & Pron.unPronunciations & foldMap (snd >>> Pron.pronunciationText)
  62                     , "\n"
  63                     ]
  64                       & mconcat
  65               )
  66             & Text.encodeUtf8
  67             & ByteString.Builder.byteString
  68         err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString
  69
  70 {-
  71 pronunciationResultTest :: Pron.Table -> String -> TestDefM (outers) () ()
  72 pronunciationResultTest rules sentence = do
  73   outPath <- goldenPath sentence "txt"
  74   it sentence do
  75     pureGoldenByteStringBuilderFile outPath $
  76       case sentence & Text.pack & Pron.run rules of
  77         Right x ->
  78           x & foldMap \Pron.Pron{pronInput, pronRule} ->
  79             pronInput
  80               & Text.encodeUtf8
  81               & ByteString.Builder.byteString
  82         err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString
  83
  84 -}
  85 pronunciationParserTest ::
  86   Pron.Table ->
  87   Map Pron.InputLexemes Pron.Pronunciation ->
  88   TestDefM (outers) () ()
  89 pronunciationParserTest rules sentences = do
  90   forM_ (sentences & Map.toList) \(inp, exp :: Pron.Pronunciation) -> do
  91     it (inp & Pron.unInputLexemes & Pron.lexemesChars & show) do
  92       let got :: Pron.Pronunciation =
  93             inp
  94               & Pron.unInputLexemes
  95               & Pron.runParser rules
  96               & either errorShow id
  97               & foldMap
  98                 ( either
  99                     ( \c ->
 100                         if Char.isLetter c
 101                           then errorShow ("missing phonetic for" :: Text, c, "in" :: Text, inp)
 102                           else
 103                             Pron.Pronunciations
 104                               [ "" :=
 105                                   Pron.Pronunciation
 106                                     { pronunciationIPABroad = []
 107                                     , pronunciationText = Text.singleton c
 108                                     }
 109                               ]
 110                     )
 111                     (Pron.pronRule >>> Pron.rulePron)
 112                 )
 113               & Pron.joinPronunciations
 114               & Pron.unPronunciations
 115               & List.head
 116               & snd
 117       Pron.pronunciationText got
 118         `shouldBe` Pron.pronunciationText exp
 119
 120 {-
 121 let baseName =
 122       sentence
 123         & foldMap \Pron.ExampleLiteral{..} ->
 124           [ [TextShort.unpack exampleLiteralText]
 125           , if TextShort.null exampleLiteralMeaning
 126               then []
 127               else ["{", exampleLiteralMeaning & TextShort.unpack, "}"]
 128           ]
 129             & mconcat
 130             & mconcat
 131 outPath <- goldenPath baseName "txt"
 132 it baseName do
 133   pureGoldenByteStringBuilderFile outPath $
 134     case sentence & Pron.parseLiterals rules of
 135       Right x ->
 136         x
 137           & foldMap
 138             ( \case
 139                 Left c -> [Text.singleton c, "\n"] & mconcat
 140                 Right Pron.Pron{pronInput, pronRule} ->
 141                   -- Chunk{chunkInterval = trans, chunkMachine = Pron.Machine{machinePron = pron}}
 142                   [ pronRule & Pron.rulePron & Pron.unPronunciations & foldMap (snd >>> Pron.pronunciationText)
 143                   , " ← "
 144                   , show pronInput & Text.pack
 145                   , "\n"
 146                   ]
 147                     & mconcat
 148             )
 149           & Text.encodeUtf8
 150           & ByteString.Builder.byteString
 151       err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString
 152 -}
 153
 154 data Lex = LexChar Char | LexBorder
 155   deriving (Eq, Show)
 156
 157 data State = State
 158   { statePrev :: [(Lex, [Char])]
 159   , stateNext :: [Lex]
 160   }
 161   deriving (Show)
 162
 163 readFrench :: [Char] -> [(Lex, [Char])]
 164 readFrench inp = go State{statePrev = [], stateNext = (LexChar <$> inp) <> [LexBorder]}
 165   where
 166     goMany fts State{statePrev, stateNext = n : next}
 167       | let stack = n : (statePrev <&> fst)
 168       , stack & List.isPrefixOf (fts <&> fst & List.reverse) =
 169           -- \|| traceShow (stack, fts) False
 170           Just State{statePrev = List.reverse fts <> List.drop (List.length fts - 1) statePrev, stateNext = next}
 171     goMany _ _ = Nothing
 172
 173     go State{statePrev, stateNext = []} = statePrev & List.reverse
 174     go (goMany [LexChar 'C' := "s", LexChar 'E' := "ə"] -> Just st) = go st
 175     go (goMany [LexChar 'C' := "", LexChar 'H' := "ʃ"] -> Just st) = go st
 176     go (goMany [LexChar 'S' := "", LexChar 'S' := "s"] -> Just st) = go st
 177     go (goMany [LexChar 'A' := "", LexChar 'N' := "ɑ̃"] -> Just st) = go st
 178     go (goMany [LexChar 'A' := "", LexChar 'U' := "o"] -> Just st) = go st
 179     go (goMany [LexChar 'E' := "", LexChar 'I' := "ɛ"] -> Just st) = go st
 180     go
 181       ( goMany
 182           [ LexChar 'E' := ""
 183           , LexChar 'I' := "ɛ"
 184           , LexChar 'L' := ""
 185           , LexChar 'L' := ""
 186           , LexChar 'E' := "j"
 187           , LexBorder := ""
 188           ] ->
 189           Just st
 190         ) = go st
 191     go (goMany [LexChar 'T' := "", LexChar 'T' := "t"] -> Just st) = go st
 192     go (goMany [LexChar 'E' := "", LexChar 'T' := "t"] -> Just st) = go st
 193     go (goMany [LexChar 'E' := "", LexBorder := ""] -> Just st) = go st
 194     go (goMany [LexBorder := ""] -> Just st) = go st
 195     go (goMany [LexChar 'A' := "a"] -> Just st) = go st
 196     go (goMany [LexChar 'B' := "b"] -> Just st) = go st
 197     go (goMany [LexChar 'C' := "k"] -> Just st) = go st
 198     go (goMany [LexChar 'H' := ""] -> Just st) = go st
 199     go (goMany [LexChar 'I' := "i"] -> Just st) = go st
 200     go (goMany [LexChar 'L' := "l"] -> Just st) = go st
 201     go (goMany [LexChar 'O' := "ɔ"] -> Just st) = go st
 202     go (goMany [LexChar 'T' := "t"] -> Just st) = go st
 203     go (goMany [LexChar 'N' := "n"] -> Just st) = go st
 204     go (goMany [LexChar 'R' := "ʁ"] -> Just st) = go st
 205     go (goMany [LexChar 'S' := "s"] -> Just st) = go st
 206     go (goMany [LexChar 'E' := "ə"] -> Just st) = go st
 207     go (goMany [LexChar 'É' := "e"] -> Just st) = go st
 208     go (goMany [LexChar 'U' := "y"] -> Just st) = go st
 209     go (goMany [LexChar 'V' := "v"] -> Just st) = go st
 210     go st = errorShow st
 211
 212 {-
 213 go (('C' := _):prev) (n@'E':next) = go ((n := "ə"):('C' := "s"):prev) next
 214 go (('S' := _):prev) ('S':next) = go (('S' := ""):('S' := "s"):prev) next
 215 go (((isVowel->True) := _):prev) ('S':next@((isVowel->True):_)) = go (('S' := "z"):prev) next
 216 go prev next = errorShow (prev, next)
 217 -}
 218
 219 isConsonant = isVowel >>> not
 220 isVowel 'A' = True
 221 isVowel 'E' = True
 222 isVowel 'É' = True
 223 isVowel 'È' = True
 224 isVowel 'Ê' = True
 225 isVowel 'I' = True
 226 isVowel 'O' = True
 227 isVowel 'U' = True
 228 isVowel 'Y' = True
 229 isVowel _ = False
 230
 231 spec = do
 232   describe "Pronunciation" do
 233     describe "test" do
 234       it "ABEILLE" do
 235         readFrench "ABEILLE"
 236           `shouldBe` [ LexChar 'A' := "a"
 237                      , LexChar 'B' := "b"
 238                      , LexChar 'E' := ""
 239                      , LexChar 'I' := "ɛ"
 240                      , LexChar 'L' := ""
 241                      , LexChar 'L' := ""
 242                      , LexChar 'E' := "j"
 243                      , LexBorder := ""
 244                      ]
 245       it "ASSURANCE" do
 246         readFrench "ASSURANCE"
 247           `shouldBe` [ LexChar 'A' := "a"
 248                      , LexChar 'S' := ""
 249                      , LexChar 'S' := "s"
 250                      , LexChar 'U' := "y"
 251                      , LexChar 'R' := "ʁ"
 252                      , LexChar 'A' := ""
 253                      , LexChar 'N' := "ɑ̃"
 254                      , LexChar 'C' := "s"
 255                      , LexChar 'E' := ""
 256                      , LexBorder := ""
 257                      ]
 258       it "CHEVAUCHE" do
 259         readFrench "CHEVAUCHE"
 260           `shouldBe` [ LexChar 'C' := ""
 261                      , LexChar 'H' := "ʃ"
 262                      , LexChar 'E' := "ə"
 263                      , LexChar 'V' := "v"
 264                      , LexChar 'A' := ""
 265                      , LexChar 'U' := "o"
 266                      , LexChar 'C' := ""
 267                      , LexChar 'H' := "ʃ"
 268                      , LexChar 'E' := ""
 269                      , LexBorder := ""
 270                      ]
 271       it "NÉCESSAIRE" do
 272         readFrench "NÉCESSAIRE"
 273           `shouldBe` [ LexChar 'N' := "n"
 274                      , LexChar 'É' := "e"
 275                      , LexChar 'C' := "s"
 276                      , LexChar 'E' := "ə"
 277                      , LexChar 'S' := ""
 278                      , LexChar 'S' := "s"
 279                      , LexChar 'A' := "a"
 280                      , LexChar 'I' := "i"
 281                      , LexChar 'R' := "ʁ"
 282                      , LexChar 'E' := ""
 283                      , LexBorder := ""
 284                      ]
 285
 286       -- () `shouldBe` ()
 287       -- buildAna "a" `shouldBe` Ana [ "a" := ["a"] ]
 288       -- buildAna "as" `shouldBe` Ana [ "as" := ["az"] ]
 289       -- buildAna "ass" `shouldBe` Ana [ "ass" := ["as"] ]
 290       it "CAROTTE" do
 291         readFrench "CAROTTE"
 292           `shouldBe` [ LexChar 'C' := "k"
 293                      , LexChar 'A' := "a"
 294                      , LexChar 'R' := "ʁ"
 295                      , LexChar 'O' := "ɔ"
 296                      , LexChar 'T' := ""
 297                      , LexChar 'T' := "t"
 298                      , LexChar 'E' := ""
 299                      , LexBorder := ""
 300                      ]
 301
 302 -- () `shouldBe` ()
 303 -- buildAna "a" `shouldBe` Ana [ "a" := ["a"] ]
 304 -- buildAna "as" `shouldBe` Ana [ "as" := ["az"] ]
 305 -- buildAna "ass" `shouldBe` Ana [ "ass" := ["as"] ]
 306 -- it "assurance" do
 307 --  ana_assurance `shouldBe` ana_assurance
 308 -- outPath <- goldenPath "1" "txt"
 309 -- it "1" do
 310 --  pureGoldenByteStringBuilderFile outPath $
 311 --    mergeFrench
 312 --      & pShowNoColor
 313 --      & Text.Lazy.encodeUtf8
 314 --      & ByteString.Builder.lazyByteString
 315 {-
 316 describe "sentence" do
 317   forM_ sentences \sentence -> do
 318       outPath <- goldenPath sentence "txt"
 319       it sentence do
 320         pureGoldenByteStringBuilderFile outPath $
 321           case sentence & Text.pack & parse patterns of
 322             err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString
 323             Right x ->
 324                 x
 325                 & pShowNoColor
 326                 & Text.Lazy.encodeUtf8
 327                 & ByteString.Builder.lazyByteString
 328               {-
 329               x
 330                 & foldMap
 331                   ( \case
 332                       Left c -> [Text.singleton c, "\n"] & mconcat
 333                       Right Pron.Pron{pronInput, pronRule} ->
 334                         [ pronInput & Pron.lexemesChars & Text.pack
 335                         , " → "
 336                         , pronRule & Pron.rulePron & Pron.unPronunciations & foldMap (snd >>> Pron.pronunciationText)
 337                         , "\n"
 338                         ]
 339                           & mconcat
 340                   )
 341                 & Text.encodeUtf8
 342                 & ByteString.Builder.byteString
 343               -}
 344 -}
 345
 346 sentences :: [String]
 347 sentences =
 348   [ "assurance"
 349   , "exercice"
 350   , "nécessaire"
 351   ]
 352
 353 type (:=) a b = (a, b)
 354 data Inp = InpChar Char | InpBorder
 355 ana1 :: [Inp] -> [Inp := [[Inp] := [IPA.Syllable []]]]
 356 ana1 = go
 357   where
 358     go :: [Inp] -> [Inp := [[Inp] := [IPA.Syllable []]]]
 359     go [] = []
 360     go (i@(InpChar 'a') : next) = [i := [[i] := ["a"]]] <> go next
 361
 362 {-
 363 data Dap k a = Dap { dapDefault :: a, dap :: Map k a }
 364 type Phon = [IPA.Syllable []]
 365 type CaseNextString = Dap String CasePrevPhon
 366 type CasePrevPhon = Dap CasePhon CaseNextPhon
 367 type CaseNextPhon = Dap CasePhon Phon
 368 cases :: CaseNextString
 369 cases =
 370   [ "a" := Dap ["a"] []
 371   , "au" := Dap ["o"] []
 372   , "ay" := Dap ["ɛ"] []
 373   , "ai" := Dap ["ɛ"] []
 374   , "c" := Dap []
 375  -- , "ch" :=
 376  -- , "s" :=
 377  -- , "ss" :=
 378   ]
 379
 380 -- a := ["a"]
 381 type Phon = IPA.Syllable []
 382 type AnaMap = ([Char], [Phon])
 383 data Ana = Ana
 384   { anaDecomp :: [AnaMap]
 385   } deriving (Eq, Show)
 386
 387 -- assurance
 388 -- a | a   | a = a
 389 -- as | az | vowel + s = z
 390 -- ass | as | s + s = s
 391 ana_assurance =
 392   Ana [ "a" := ["a."]
 393       , "ss" := ["s"]
 394       , "u" := ["y."]
 395       , "r" := ["ʁ"]
 396       , "an" := ["ɑ̃"]
 397       , "ce" := ["s."]
 398       ]
 399 ana_exercice =
 400   Ana [ "e" := ["ɛ"]
 401       , "x" := ["g", "z"]
 402       , "er" := ["ɛʁ"]
 403       , "cice" := ["sis"]
 404       ]
 405 ana_nécessaire =
 406   Ana [ "né" := ["ne"]
 407       , "ce" := ["sé"]
 408       , "ssaire" := ["sɛʁ"]
 409       ]
 410
 411 buildAna :: String -> Ana
 412 buildAna = List.foldl' (\acc c -> appendAna acc (InpChar c)) (Ana [])
 413
 414 appendAna :: Ana -> Inp -> Ana
 415 appendAna (Ana []) (InpChar c) = case c of
 416   'a' -> Ana [ "a" := ["a"] ]
 417   'b' -> Ana [ "b" := ["be"] ]
 418   'c' -> Ana [ "c" := ["se"] ]
 419   'd' -> Ana [ "d" := ["de"] ]
 420   'e' -> Ana [ "e" := ["ə"] ]
 421   'f' -> Ana [ "f" := ["ɛf"] ]
 422   'g' -> Ana [ "g" := ["g"] ]
 423   'h' -> Ana [ "h" := ["aʃ"] ]
 424   'i' -> Ana [ "i" := ["i"] ]
 425   'j' -> Ana [ "j" := ["ʒi"] ]
 426   'k' -> Ana [ "k" := ["ka"] ]
 427   'l' -> Ana [ "l" := ["ɛl"] ]
 428   'm' -> Ana [ "m" := ["ɛm"] ]
 429   'n' -> Ana [ "n" := ["ɛn"] ]
 430   'o' -> Ana [ "o" := ["o"] ]
 431   'p' -> Ana [ "p" := ["pe"] ]
 432   'q' -> Ana [ "q" := ["ky"] ]
 433   'r' -> Ana [ "r" := ["ɛʁ"] ]
 434   's' -> Ana [ "s" := ["ɛs"] ]
 435   't' -> Ana [ "t" := ["te"] ]
 436   'u' -> Ana [ "u" := ["y"] ]
 437   'v' -> Ana [ "v" := ["ve"] ]
 438   'w' -> Ana [ "w" := ["dubl", "ve"] ]
 439   'x' -> Ana [ "x" := ["ɛks"] ]
 440   'y' -> Ana [ "y" := ["igʁɛk"] ]
 441   'z' -> Ana [ "z" := ["zɛd"] ]
 442   _ -> errorShow c
 443 appendAna
 444   (anaPrevPhonSeg -> Just IPA.Vowel{})
 445   (InpChar 's') = (Ana ["as":=["az"]])
 446 appendAna ana (InpChar 's')
 447   | ana & anaPrevSuffix "s"
 448   = ana & anaPrevMap \case
 449     (List.isSuffixOf "s" -> True) := _ -> "ss" := ["s"]
 450 appendAna x y = errorShow (x, y)
 451
 452 anaPrev (Ana xs) = xs & lastMaybe
 453 anaPrevMap :: (AnaMap -> AnaMap) -> Ana -> Ana
 454 anaPrevMap f (Ana l) =
 455   case l & List.reverse of
 456     x : xs -> Ana $ (f x : xs) & List.reverse
 457     [] -> Ana []
 458
 459 anaPrevSuffix :: [Char] -> Ana -> Bool
 460 anaPrevSuffix suf ana = ana & anaPrev & \case
 461   Just (cs := _) -> cs & List.isSuffixOf suf
 462   _ -> False
 463 anaPrevPhonSeg x = x & anaPrev >>= \case
 464   _ := phon
 465     | Just lastPhon <- phon & lastMaybe
 466     , segs <- lastPhon & IPA.syllableToSegments
 467     , Just lastSeg <- segs & lastMaybe <&> IPA.dropSegmentalFeatures
 468     -> Just lastSeg
 469   _ -> Nothing
 470 data Input = Input
 471   { inputText :: Text
 472   , inputPhonetic :: [IPA.Syllable []]
 473   , inputMeaning :: Maybe ShortText
 474   }
 475 data Pos = PosPrev | PosNext
 476   deriving (Eq, Ord, Show)
 477
 478 data Step = Step
 479   { stepPos :: Pos
 480   , stepPat :: Pat
 481   }
 482   deriving (Eq, Ord, Show)
 483
 484 data PatTree
 485   = PatTreeBranches (Map Step PatTree)
 486   | PatTreeLeaf Pronunciations
 487   deriving (Show)
 488 instance IsList PatTree where
 489   type Item PatTree = (Step, PatTree)
 490   fromList = PatTreeBranches . Map.fromListWith (errorShow)
 491   toList = errorShow
 492
 493 data Action
 494  = Action
 495  { actionRead :: Pat
 496  , actionAfter :: Pat
 497  }
 498
 499 data Parse
 500   = ParseReadChar (Map Char Parse)
 501   | ParsePhon [IPA.Syllable []]
 502   | ParseLook Parse
 503   | ParseLookBorder
 504   | ParseAhead Parse
 505
 506 data Mol = Mol
 507   { molLangues :: Set String
 508   , molRead :: String
 509   , molReading :: String
 510   , molPhoneticPrepend :: Map MolPat [IPA.Syllable []]
 511   }
 512
 513 data MolPat
 514  = MolPatChars String
 515  | MolPatVowel
 516  | MolPatConsonant
 517  | MolPatSemivowel
 518  deriving (Eq, Ord, Show)
 519
 520 data Chunk = Chunk
 521   { chunkLangue :: [String]
 522   , chunkLetters :: String
 523   , chunkPhonetic :: [IPA.Syllable []]
 524   } deriving (Eq, Ord, Show)
 525
 526 mol_french :: [Mol]
 527 mol_french =
 528   [ mol_french_s
 529   ]
 530 mol_french_s = Mol
 531   { molLangues = ["french"]
 532   , molRead = ""
 533   , molReading = "s"
 534   , molPhoneticPrepend =
 535       [ MolPatChars "" := ["ɛs"]
 536       , MolPatChars "s" := ["s"]
 537       ]
 538   }
 539
 540 -- assurances -> a.ssu.rances
 541 --
 542 -- a + s = /s
 543 -- s + s = /s
 544
 545 mergeMol :: Mol -> Mol -> Mol
 546 mergeMol x y = x
 547
 548 french = Chunk
 549   { chunkLangue = ["french"]
 550   , chunkLetters = []
 551   , chunkPhonetic = []
 552   }
 553
 554 -- exposeChars :: [Char]
 555 -- exposePhons :: [IPA.Syllables []]
 556
 557 data Merge = Merge Chunk Chunk Chunk
 558   deriving (Eq, Ord, Show)
 559
 560 runMerge :: [Merge] -> Chunk -> Chunk -> Chunk
 561 runMerge ms x y = x
 562
 563 mergeFrench :: [Merge]
 564 mergeFrench =
 565   [ merge french_a french_n ["ɑ̃"]
 566   ]
 567   where
 568   merge x y chunkPhonetic =
 569     Merge x y
 570     Chunk
 571       { chunkLangue = chunkLangue x
 572       , chunkLetters = chunkLetters x <> chunkLetters y
 573       , chunkPhonetic
 574       }
 575
 576 -- Char + Char =
 577 -- Vowel + Char =
 578 -- Char + Vowel =
 579 -- Consonant + Char =
 580 -- Char + Consonant =
 581
 582 french_a = french { chunkLetters = "a", chunkPhonetic = ["a"] }
 583 french_b = french { chunkLetters = "b", chunkPhonetic = ["be"] }
 584 french_c = french { chunkLetters = "c", chunkPhonetic = ["se"] }
 585 french_d = french { chunkLetters = "d", chunkPhonetic = ["de"] }
 586 french_e = french { chunkLetters = "e", chunkPhonetic = ["ə"] }
 587 french_f = french { chunkLetters = "f", chunkPhonetic = ["ɛf"] }
 588 french_g = french { chunkLetters = "g", chunkPhonetic = ["g"] }
 589 french_h = french { chunkLetters = "h", chunkPhonetic = ["aʃ"] }
 590 french_i = french { chunkLetters = "i", chunkPhonetic = ["i"] }
 591 french_j = french { chunkLetters = "j", chunkPhonetic = ["ʒi"] }
 592 french_k = french { chunkLetters = "k", chunkPhonetic = ["ka"] }
 593 french_l = french { chunkLetters = "l", chunkPhonetic = ["ɛl"] }
 594 french_m = french { chunkLetters = "m", chunkPhonetic = ["ɛm"] }
 595 french_n = french { chunkLetters = "n", chunkPhonetic = ["ɛn"] }
 596 french_o = french { chunkLetters = "o", chunkPhonetic = ["o"] }
 597 french_p = french { chunkLetters = "p", chunkPhonetic = ["pe"] }
 598 french_q = french { chunkLetters = "q", chunkPhonetic = ["ky"] }
 599 french_r = french { chunkLetters = "r", chunkPhonetic = ["ɛʁ"] }
 600 french_s = french { chunkLetters = "s", chunkPhonetic = ["ɛs"] }
 601 french_t = french { chunkLetters = "t", chunkPhonetic = ["te"] }
 602 french_u = french { chunkLetters = "u", chunkPhonetic = ["y"] }
 603 french_v = french { chunkLetters = "v", chunkPhonetic = ["ve"] }
 604 french_w = french { chunkLetters = "w", chunkPhonetic = ["dubl", "ve"] }
 605 french_x = french { chunkLetters = "x", chunkPhonetic = ["ɛks"] }
 606 french_y = french { chunkLetters = "y", chunkPhonetic = ["igʁɛk"] }
 607 french_z = french { chunkLetters = "z", chunkPhonetic = ["zɛd"] }
 608
 609 type Patterns = Map Step PatTree
 610
 611 patterns :: Map Step PatTree
 612 patterns =
 613   [ Step PosNext (PatChar 'a') :=
 614       [ Step PosNext PatLexicalBorder :=
 615           PatTreeLeaf ["a" := "ə"]
 616       ]
 617   , Step PosNext (PatChar 't') :=
 618       [ Step PosNext (PatChar 'h') :=
 619           [ Step PosNext (PatChar 'e') :=
 620               [ Step PosNext (PatLexicalCategory Char.Space) :=
 621                   PatTreeLeaf ["the" := "zi"]
 622               ]
 623           ]
 624       ]
 625   ]
 626
 627 data State = State
 628   { stateInput :: LZ.Zipper Inp
 629   , stateBuffer :: [Step]
 630   , statePats :: Patterns
 631   , statePatReset :: Bool
 632   }
 633
 634 data Pat
 635   = PatChar Char
 636   | PatLexicalCategory Char.GeneralCategory
 637   | PatLexicalBorder
 638   | PatPhoneticVowel
 639   | PatPhoneticSemiVowel
 640   | PatPhoneticConsonant
 641   deriving (Eq, Ord, Show)
 642
 643 data Inp = Inp
 644   { inpSteps :: [Step]
 645   , inpPronunciations :: Pronunciations
 646   }
 647   deriving (Show)
 648
 649 parse :: Patterns -> Text -> Either String [Inp]
 650 parse initPats input =
 651   loop
 652     State
 653       { stateInput = input & Text.unpack & fmap charToInp & LZ.fromList
 654       , stateBuffer = []
 655       , statePats = initPats
 656       , statePatReset = True
 657       }
 658     & stateInput
 659     & LZ.toList
 660     & Right
 661   where
 662     charToInp :: Char -> Inp
 663     charToInp c =
 664       Inp
 665         { inpSteps = [Step { stepPos = PosNext, stepPat = PatChar c }]
 666         , inpPronunciations = []
 667         }
 668     loop :: State -> State
 669     loop st =
 670       [ look (key, val) st
 671       | (key, val) <- st & statePats & Map.toList
 672       ]
 673         & catMaybes
 674         & headMaybe
 675         & fromMaybe (loop st{statePats = initPats})
 676     look :: (Step, PatTree) -> State -> Maybe State
 677     look kv@(key, val) st =
 678       case key of
 679         Step{stepPos = PosPrev, stepPat = patPrev} -> errorShow ("prev" :: Text)
 680         Step{stepPos = PosNext, stepPat = patNext} ->
 681           case patNext of
 682             PatLexicalBorder
 683               | stateInput st & LZ.endp -> match kv st
 684             -- PatChar c
 685             --  | Just inpNext <- stateInput st & LZ.safeCursor ->
 686             --    inpNext & inpSteps &
 687             --  case  of
 688             --    Nothing ->
 689             _ -> Nothing
 690     match kv@(key, val) st =
 691       case val of
 692         PatTreeLeaf pron ->
 693           Just
 694             st
 695               { statePats = initPats
 696               , stateBuffer = []
 697               , stateInput =
 698                   stateInput st
 699                     & LZ.insert
 700                       Inp
 701                         { inpSteps = stateBuffer st & (key :) & List.reverse
 702                         , inpPronunciations = pron
 703                         }
 704               }
 705         PatTreeBranches pats ->
 706           Just
 707             st
 708               { statePats = pats
 709               , stateBuffer = key : stateBuffer st
 710               }
 711 -}