]> Git — Sourcephile - julm/worksheets.git/blob - tests/Utils/Pronunciation.hs
wip
[julm/worksheets.git] / tests / Utils / Pronunciation.hs
1 {-# LANGUAGE OverloadedLists #-}
2
3 module Utils.Pronunciation where
4
5 import Control.Monad.Trans.Class qualified as MT
6 import Control.Monad.Trans.State qualified as MT
7 import Data.ByteString.Builder qualified as ByteString.Builder
8 import Data.Char qualified as Char
9 import Data.GenValidity.Map ()
10 import Data.GenValidity.Sequence ()
11 import Data.GenValidity.Set ()
12 import Data.GenValidity.Text ()
13 import Data.List qualified as List
14 import Data.List.Zipper qualified as LZ
15 import Data.Map.Strict qualified as Map
16 import Data.Text qualified as Text
17 import Data.Text.Encoding qualified as Text
18 import Data.Text.Lazy qualified as Text.Lazy
19 import Data.Text.Lazy.Encoding qualified as Text.Lazy
20 import Data.Text.Short qualified as TextShort
21 import Data.Validity.Map ()
22 import Data.Validity.Set ()
23 import Data.Validity.Text ()
24 import Language.Pronunciation (Pronunciation (..), Pronunciations (..))
25 import Language.Pronunciation qualified as Pron
26 import Test.Syd
27 import Utils.Tests
28 import Worksheets.Utils.HTML (Html)
29 import Worksheets.Utils.HTML qualified as HTML
30 import Worksheets.Utils.IPA qualified as IPA
31 import Worksheets.Utils.Prelude
32
33 cardsHtml :: String -> IO Html -> TestDefM (outers) () ()
34 cardsHtml title html = do
35 outPath <- goldenPath title "html"
36 builder <- html <&> HTML.renderMarkupBuilder & liftIO
37 it title do
38 goldenByteStringBuilderFile outPath (return builder)
39
40 lexerRender rules = do
41 let title = "lexer"
42 outPath <- goldenPath title "html"
43 builder <- rules & Pron.tableHtml <&> HTML.renderMarkupBuilder & liftIO
44 it title do
45 goldenByteStringBuilderFile outPath (return builder)
46
47 pronunciationDecompositionTest :: Pron.Table -> String -> TestDefM (outers) () ()
48 pronunciationDecompositionTest rules sentence = do
49 outPath <- goldenPath sentence "txt"
50 it sentence do
51 pureGoldenByteStringBuilderFile outPath $
52 case sentence & Text.pack & Pron.run rules of
53 Right x ->
54 x
55 & foldMap
56 ( \case
57 Left c -> [Text.singleton c, "\n"] & mconcat
58 Right Pron.Pron{pronInput, pronRule} ->
59 [ pronInput & Pron.lexemesChars & Text.pack
60 , " → "
61 , pronRule & Pron.rulePron & Pron.unPronunciations & foldMap (snd >>> Pron.pronunciationText)
62 , "\n"
63 ]
64 & mconcat
65 )
66 & Text.encodeUtf8
67 & ByteString.Builder.byteString
68 err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString
69
70 {-
71 pronunciationResultTest :: Pron.Table -> String -> TestDefM (outers) () ()
72 pronunciationResultTest rules sentence = do
73 outPath <- goldenPath sentence "txt"
74 it sentence do
75 pureGoldenByteStringBuilderFile outPath $
76 case sentence & Text.pack & Pron.run rules of
77 Right x ->
78 x & foldMap \Pron.Pron{pronInput, pronRule} ->
79 pronInput
80 & Text.encodeUtf8
81 & ByteString.Builder.byteString
82 err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString
83
84 -}
85 pronunciationParserTest ::
86 Pron.Table ->
87 Map Pron.InputLexemes Pron.Pronunciation ->
88 TestDefM (outers) () ()
89 pronunciationParserTest rules sentences = do
90 forM_ (sentences & Map.toList) \(inp, exp :: Pron.Pronunciation) -> do
91 it (inp & Pron.unInputLexemes & Pron.lexemesChars & show) do
92 let got :: Pron.Pronunciation =
93 inp
94 & Pron.unInputLexemes
95 & Pron.runParser rules
96 & either errorShow id
97 & foldMap
98 ( either
99 ( \c ->
100 if Char.isLetter c
101 then errorShow ("missing phonetic for" :: Text, c, "in" :: Text, inp)
102 else
103 Pron.Pronunciations
104 [ "" :=
105 Pron.Pronunciation
106 { pronunciationIPABroad = []
107 , pronunciationText = Text.singleton c
108 }
109 ]
110 )
111 (Pron.pronRule >>> Pron.rulePron)
112 )
113 & Pron.joinPronunciations
114 & Pron.unPronunciations
115 & List.head
116 & snd
117 Pron.pronunciationText got
118 `shouldBe` Pron.pronunciationText exp
119
120 {-
121 let baseName =
122 sentence
123 & foldMap \Pron.ExampleLiteral{..} ->
124 [ [TextShort.unpack exampleLiteralText]
125 , if TextShort.null exampleLiteralMeaning
126 then []
127 else ["{", exampleLiteralMeaning & TextShort.unpack, "}"]
128 ]
129 & mconcat
130 & mconcat
131 outPath <- goldenPath baseName "txt"
132 it baseName do
133 pureGoldenByteStringBuilderFile outPath $
134 case sentence & Pron.parseLiterals rules of
135 Right x ->
136 x
137 & foldMap
138 ( \case
139 Left c -> [Text.singleton c, "\n"] & mconcat
140 Right Pron.Pron{pronInput, pronRule} ->
141 -- Chunk{chunkInterval = trans, chunkMachine = Pron.Machine{machinePron = pron}}
142 [ pronRule & Pron.rulePron & Pron.unPronunciations & foldMap (snd >>> Pron.pronunciationText)
143 , " ← "
144 , show pronInput & Text.pack
145 , "\n"
146 ]
147 & mconcat
148 )
149 & Text.encodeUtf8
150 & ByteString.Builder.byteString
151 err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString
152 -}
153
154 data Lex = LexChar Char | LexBorder
155 deriving (Eq, Show)
156
157 data State = State
158 { statePrev :: [(Lex, [Char])]
159 , stateNext :: [Lex]
160 }
161 deriving (Show)
162
163 readFrench :: [Char] -> [(Lex, [Char])]
164 readFrench inp = go State{statePrev = [], stateNext = (LexChar <$> inp) <> [LexBorder]}
165 where
166 goMany fts State{statePrev, stateNext = n : next}
167 | let stack = n : (statePrev <&> fst)
168 , stack & List.isPrefixOf (fts <&> fst & List.reverse) =
169 -- \|| traceShow (stack, fts) False
170 Just State{statePrev = List.reverse fts <> List.drop (List.length fts - 1) statePrev, stateNext = next}
171 goMany _ _ = Nothing
172
173 go State{statePrev, stateNext = []} = statePrev & List.reverse
174 go (goMany [LexChar 'C' := "s", LexChar 'E' := "ə"] -> Just st) = go st
175 go (goMany [LexChar 'C' := "", LexChar 'H' := "ʃ"] -> Just st) = go st
176 go (goMany [LexChar 'S' := "", LexChar 'S' := "s"] -> Just st) = go st
177 go (goMany [LexChar 'A' := "", LexChar 'N' := "ɑ̃"] -> Just st) = go st
178 go (goMany [LexChar 'A' := "", LexChar 'U' := "o"] -> Just st) = go st
179 go (goMany [LexChar 'E' := "", LexChar 'I' := "ɛ"] -> Just st) = go st
180 go
181 ( goMany
182 [ LexChar 'E' := ""
183 , LexChar 'I' := "ɛ"
184 , LexChar 'L' := ""
185 , LexChar 'L' := ""
186 , LexChar 'E' := "j"
187 , LexBorder := ""
188 ] ->
189 Just st
190 ) = go st
191 go (goMany [LexChar 'T' := "", LexChar 'T' := "t"] -> Just st) = go st
192 go (goMany [LexChar 'E' := "", LexChar 'T' := "t"] -> Just st) = go st
193 go (goMany [LexChar 'E' := "", LexBorder := ""] -> Just st) = go st
194 go (goMany [LexBorder := ""] -> Just st) = go st
195 go (goMany [LexChar 'A' := "a"] -> Just st) = go st
196 go (goMany [LexChar 'B' := "b"] -> Just st) = go st
197 go (goMany [LexChar 'C' := "k"] -> Just st) = go st
198 go (goMany [LexChar 'H' := ""] -> Just st) = go st
199 go (goMany [LexChar 'I' := "i"] -> Just st) = go st
200 go (goMany [LexChar 'L' := "l"] -> Just st) = go st
201 go (goMany [LexChar 'O' := "ɔ"] -> Just st) = go st
202 go (goMany [LexChar 'T' := "t"] -> Just st) = go st
203 go (goMany [LexChar 'N' := "n"] -> Just st) = go st
204 go (goMany [LexChar 'R' := "ʁ"] -> Just st) = go st
205 go (goMany [LexChar 'S' := "s"] -> Just st) = go st
206 go (goMany [LexChar 'E' := "ə"] -> Just st) = go st
207 go (goMany [LexChar 'É' := "e"] -> Just st) = go st
208 go (goMany [LexChar 'U' := "y"] -> Just st) = go st
209 go (goMany [LexChar 'V' := "v"] -> Just st) = go st
210 go st = errorShow st
211
212 {-
213 go (('C' := _):prev) (n@'E':next) = go ((n := "ə"):('C' := "s"):prev) next
214 go (('S' := _):prev) ('S':next) = go (('S' := ""):('S' := "s"):prev) next
215 go (((isVowel->True) := _):prev) ('S':next@((isVowel->True):_)) = go (('S' := "z"):prev) next
216 go prev next = errorShow (prev, next)
217 -}
218
219 isConsonant = isVowel >>> not
220 isVowel 'A' = True
221 isVowel 'E' = True
222 isVowel 'É' = True
223 isVowel 'È' = True
224 isVowel 'Ê' = True
225 isVowel 'I' = True
226 isVowel 'O' = True
227 isVowel 'U' = True
228 isVowel 'Y' = True
229 isVowel _ = False
230
231 spec = do
232 describe "Pronunciation" do
233 describe "test" do
234 it "ABEILLE" do
235 readFrench "ABEILLE"
236 `shouldBe` [ LexChar 'A' := "a"
237 , LexChar 'B' := "b"
238 , LexChar 'E' := ""
239 , LexChar 'I' := "ɛ"
240 , LexChar 'L' := ""
241 , LexChar 'L' := ""
242 , LexChar 'E' := "j"
243 , LexBorder := ""
244 ]
245 it "ASSURANCE" do
246 readFrench "ASSURANCE"
247 `shouldBe` [ LexChar 'A' := "a"
248 , LexChar 'S' := ""
249 , LexChar 'S' := "s"
250 , LexChar 'U' := "y"
251 , LexChar 'R' := "ʁ"
252 , LexChar 'A' := ""
253 , LexChar 'N' := "ɑ̃"
254 , LexChar 'C' := "s"
255 , LexChar 'E' := ""
256 , LexBorder := ""
257 ]
258 it "CHEVAUCHE" do
259 readFrench "CHEVAUCHE"
260 `shouldBe` [ LexChar 'C' := ""
261 , LexChar 'H' := "ʃ"
262 , LexChar 'E' := "ə"
263 , LexChar 'V' := "v"
264 , LexChar 'A' := ""
265 , LexChar 'U' := "o"
266 , LexChar 'C' := ""
267 , LexChar 'H' := "ʃ"
268 , LexChar 'E' := ""
269 , LexBorder := ""
270 ]
271 it "NÉCESSAIRE" do
272 readFrench "NÉCESSAIRE"
273 `shouldBe` [ LexChar 'N' := "n"
274 , LexChar 'É' := "e"
275 , LexChar 'C' := "s"
276 , LexChar 'E' := "ə"
277 , LexChar 'S' := ""
278 , LexChar 'S' := "s"
279 , LexChar 'A' := "a"
280 , LexChar 'I' := "i"
281 , LexChar 'R' := "ʁ"
282 , LexChar 'E' := ""
283 , LexBorder := ""
284 ]
285
286 -- () `shouldBe` ()
287 -- buildAna "a" `shouldBe` Ana [ "a" := ["a"] ]
288 -- buildAna "as" `shouldBe` Ana [ "as" := ["az"] ]
289 -- buildAna "ass" `shouldBe` Ana [ "ass" := ["as"] ]
290 it "CAROTTE" do
291 readFrench "CAROTTE"
292 `shouldBe` [ LexChar 'C' := "k"
293 , LexChar 'A' := "a"
294 , LexChar 'R' := "ʁ"
295 , LexChar 'O' := "ɔ"
296 , LexChar 'T' := ""
297 , LexChar 'T' := "t"
298 , LexChar 'E' := ""
299 , LexBorder := ""
300 ]
301
302 -- () `shouldBe` ()
303 -- buildAna "a" `shouldBe` Ana [ "a" := ["a"] ]
304 -- buildAna "as" `shouldBe` Ana [ "as" := ["az"] ]
305 -- buildAna "ass" `shouldBe` Ana [ "ass" := ["as"] ]
306 -- it "assurance" do
307 -- ana_assurance `shouldBe` ana_assurance
308 -- outPath <- goldenPath "1" "txt"
309 -- it "1" do
310 -- pureGoldenByteStringBuilderFile outPath $
311 -- mergeFrench
312 -- & pShowNoColor
313 -- & Text.Lazy.encodeUtf8
314 -- & ByteString.Builder.lazyByteString
315 {-
316 describe "sentence" do
317 forM_ sentences \sentence -> do
318 outPath <- goldenPath sentence "txt"
319 it sentence do
320 pureGoldenByteStringBuilderFile outPath $
321 case sentence & Text.pack & parse patterns of
322 err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString
323 Right x ->
324 x
325 & pShowNoColor
326 & Text.Lazy.encodeUtf8
327 & ByteString.Builder.lazyByteString
328 {-
329 x
330 & foldMap
331 ( \case
332 Left c -> [Text.singleton c, "\n"] & mconcat
333 Right Pron.Pron{pronInput, pronRule} ->
334 [ pronInput & Pron.lexemesChars & Text.pack
335 , " → "
336 , pronRule & Pron.rulePron & Pron.unPronunciations & foldMap (snd >>> Pron.pronunciationText)
337 , "\n"
338 ]
339 & mconcat
340 )
341 & Text.encodeUtf8
342 & ByteString.Builder.byteString
343 -}
344 -}
345
346 sentences :: [String]
347 sentences =
348 [ "assurance"
349 , "exercice"
350 , "nécessaire"
351 ]
352
353 type (:=) a b = (a, b)
354 data Inp = InpChar Char | InpBorder
355 ana1 :: [Inp] -> [Inp := [[Inp] := [IPA.Syllable []]]]
356 ana1 = go
357 where
358 go :: [Inp] -> [Inp := [[Inp] := [IPA.Syllable []]]]
359 go [] = []
360 go (i@(InpChar 'a') : next) = [i := [[i] := ["a"]]] <> go next
361
362 {-
363 data Dap k a = Dap { dapDefault :: a, dap :: Map k a }
364 type Phon = [IPA.Syllable []]
365 type CaseNextString = Dap String CasePrevPhon
366 type CasePrevPhon = Dap CasePhon CaseNextPhon
367 type CaseNextPhon = Dap CasePhon Phon
368 cases :: CaseNextString
369 cases =
370 [ "a" := Dap ["a"] []
371 , "au" := Dap ["o"] []
372 , "ay" := Dap ["ɛ"] []
373 , "ai" := Dap ["ɛ"] []
374 , "c" := Dap []
375 -- , "ch" :=
376 -- , "s" :=
377 -- , "ss" :=
378 ]
379
380 -- a := ["a"]
381 type Phon = IPA.Syllable []
382 type AnaMap = ([Char], [Phon])
383 data Ana = Ana
384 { anaDecomp :: [AnaMap]
385 } deriving (Eq, Show)
386
387 -- assurance
388 -- a | a | a = a
389 -- as | az | vowel + s = z
390 -- ass | as | s + s = s
391 ana_assurance =
392 Ana [ "a" := ["a."]
393 , "ss" := ["s"]
394 , "u" := ["y."]
395 , "r" := ["ʁ"]
396 , "an" := ["ɑ̃"]
397 , "ce" := ["s."]
398 ]
399 ana_exercice =
400 Ana [ "e" := ["ɛ"]
401 , "x" := ["g", "z"]
402 , "er" := ["ɛʁ"]
403 , "cice" := ["sis"]
404 ]
405 ana_nécessaire =
406 Ana [ "né" := ["ne"]
407 , "ce" := ["sé"]
408 , "ssaire" := ["sɛʁ"]
409 ]
410
411 buildAna :: String -> Ana
412 buildAna = List.foldl' (\acc c -> appendAna acc (InpChar c)) (Ana [])
413
414 appendAna :: Ana -> Inp -> Ana
415 appendAna (Ana []) (InpChar c) = case c of
416 'a' -> Ana [ "a" := ["a"] ]
417 'b' -> Ana [ "b" := ["be"] ]
418 'c' -> Ana [ "c" := ["se"] ]
419 'd' -> Ana [ "d" := ["de"] ]
420 'e' -> Ana [ "e" := ["ə"] ]
421 'f' -> Ana [ "f" := ["ɛf"] ]
422 'g' -> Ana [ "g" := ["g"] ]
423 'h' -> Ana [ "h" := ["aʃ"] ]
424 'i' -> Ana [ "i" := ["i"] ]
425 'j' -> Ana [ "j" := ["ʒi"] ]
426 'k' -> Ana [ "k" := ["ka"] ]
427 'l' -> Ana [ "l" := ["ɛl"] ]
428 'm' -> Ana [ "m" := ["ɛm"] ]
429 'n' -> Ana [ "n" := ["ɛn"] ]
430 'o' -> Ana [ "o" := ["o"] ]
431 'p' -> Ana [ "p" := ["pe"] ]
432 'q' -> Ana [ "q" := ["ky"] ]
433 'r' -> Ana [ "r" := ["ɛʁ"] ]
434 's' -> Ana [ "s" := ["ɛs"] ]
435 't' -> Ana [ "t" := ["te"] ]
436 'u' -> Ana [ "u" := ["y"] ]
437 'v' -> Ana [ "v" := ["ve"] ]
438 'w' -> Ana [ "w" := ["dubl", "ve"] ]
439 'x' -> Ana [ "x" := ["ɛks"] ]
440 'y' -> Ana [ "y" := ["igʁɛk"] ]
441 'z' -> Ana [ "z" := ["zɛd"] ]
442 _ -> errorShow c
443 appendAna
444 (anaPrevPhonSeg -> Just IPA.Vowel{})
445 (InpChar 's') = (Ana ["as":=["az"]])
446 appendAna ana (InpChar 's')
447 | ana & anaPrevSuffix "s"
448 = ana & anaPrevMap \case
449 (List.isSuffixOf "s" -> True) := _ -> "ss" := ["s"]
450 appendAna x y = errorShow (x, y)
451
452 anaPrev (Ana xs) = xs & lastMaybe
453 anaPrevMap :: (AnaMap -> AnaMap) -> Ana -> Ana
454 anaPrevMap f (Ana l) =
455 case l & List.reverse of
456 x : xs -> Ana $ (f x : xs) & List.reverse
457 [] -> Ana []
458
459 anaPrevSuffix :: [Char] -> Ana -> Bool
460 anaPrevSuffix suf ana = ana & anaPrev & \case
461 Just (cs := _) -> cs & List.isSuffixOf suf
462 _ -> False
463 anaPrevPhonSeg x = x & anaPrev >>= \case
464 _ := phon
465 | Just lastPhon <- phon & lastMaybe
466 , segs <- lastPhon & IPA.syllableToSegments
467 , Just lastSeg <- segs & lastMaybe <&> IPA.dropSegmentalFeatures
468 -> Just lastSeg
469 _ -> Nothing
470 data Input = Input
471 { inputText :: Text
472 , inputPhonetic :: [IPA.Syllable []]
473 , inputMeaning :: Maybe ShortText
474 }
475 data Pos = PosPrev | PosNext
476 deriving (Eq, Ord, Show)
477
478 data Step = Step
479 { stepPos :: Pos
480 , stepPat :: Pat
481 }
482 deriving (Eq, Ord, Show)
483
484 data PatTree
485 = PatTreeBranches (Map Step PatTree)
486 | PatTreeLeaf Pronunciations
487 deriving (Show)
488 instance IsList PatTree where
489 type Item PatTree = (Step, PatTree)
490 fromList = PatTreeBranches . Map.fromListWith (errorShow)
491 toList = errorShow
492
493 data Action
494 = Action
495 { actionRead :: Pat
496 , actionAfter :: Pat
497 }
498
499 data Parse
500 = ParseReadChar (Map Char Parse)
501 | ParsePhon [IPA.Syllable []]
502 | ParseLook Parse
503 | ParseLookBorder
504 | ParseAhead Parse
505
506 data Mol = Mol
507 { molLangues :: Set String
508 , molRead :: String
509 , molReading :: String
510 , molPhoneticPrepend :: Map MolPat [IPA.Syllable []]
511 }
512
513 data MolPat
514 = MolPatChars String
515 | MolPatVowel
516 | MolPatConsonant
517 | MolPatSemivowel
518 deriving (Eq, Ord, Show)
519
520 data Chunk = Chunk
521 { chunkLangue :: [String]
522 , chunkLetters :: String
523 , chunkPhonetic :: [IPA.Syllable []]
524 } deriving (Eq, Ord, Show)
525
526 mol_french :: [Mol]
527 mol_french =
528 [ mol_french_s
529 ]
530 mol_french_s = Mol
531 { molLangues = ["french"]
532 , molRead = ""
533 , molReading = "s"
534 , molPhoneticPrepend =
535 [ MolPatChars "" := ["ɛs"]
536 , MolPatChars "s" := ["s"]
537 ]
538 }
539
540 -- assurances -> a.ssu.rances
541 --
542 -- a + s = /s
543 -- s + s = /s
544
545 mergeMol :: Mol -> Mol -> Mol
546 mergeMol x y = x
547
548 french = Chunk
549 { chunkLangue = ["french"]
550 , chunkLetters = []
551 , chunkPhonetic = []
552 }
553
554 -- exposeChars :: [Char]
555 -- exposePhons :: [IPA.Syllables []]
556
557 data Merge = Merge Chunk Chunk Chunk
558 deriving (Eq, Ord, Show)
559
560 runMerge :: [Merge] -> Chunk -> Chunk -> Chunk
561 runMerge ms x y = x
562
563 mergeFrench :: [Merge]
564 mergeFrench =
565 [ merge french_a french_n ["ɑ̃"]
566 ]
567 where
568 merge x y chunkPhonetic =
569 Merge x y
570 Chunk
571 { chunkLangue = chunkLangue x
572 , chunkLetters = chunkLetters x <> chunkLetters y
573 , chunkPhonetic
574 }
575
576 -- Char + Char =
577 -- Vowel + Char =
578 -- Char + Vowel =
579 -- Consonant + Char =
580 -- Char + Consonant =
581
582 french_a = french { chunkLetters = "a", chunkPhonetic = ["a"] }
583 french_b = french { chunkLetters = "b", chunkPhonetic = ["be"] }
584 french_c = french { chunkLetters = "c", chunkPhonetic = ["se"] }
585 french_d = french { chunkLetters = "d", chunkPhonetic = ["de"] }
586 french_e = french { chunkLetters = "e", chunkPhonetic = ["ə"] }
587 french_f = french { chunkLetters = "f", chunkPhonetic = ["ɛf"] }
588 french_g = french { chunkLetters = "g", chunkPhonetic = ["g"] }
589 french_h = french { chunkLetters = "h", chunkPhonetic = ["aʃ"] }
590 french_i = french { chunkLetters = "i", chunkPhonetic = ["i"] }
591 french_j = french { chunkLetters = "j", chunkPhonetic = ["ʒi"] }
592 french_k = french { chunkLetters = "k", chunkPhonetic = ["ka"] }
593 french_l = french { chunkLetters = "l", chunkPhonetic = ["ɛl"] }
594 french_m = french { chunkLetters = "m", chunkPhonetic = ["ɛm"] }
595 french_n = french { chunkLetters = "n", chunkPhonetic = ["ɛn"] }
596 french_o = french { chunkLetters = "o", chunkPhonetic = ["o"] }
597 french_p = french { chunkLetters = "p", chunkPhonetic = ["pe"] }
598 french_q = french { chunkLetters = "q", chunkPhonetic = ["ky"] }
599 french_r = french { chunkLetters = "r", chunkPhonetic = ["ɛʁ"] }
600 french_s = french { chunkLetters = "s", chunkPhonetic = ["ɛs"] }
601 french_t = french { chunkLetters = "t", chunkPhonetic = ["te"] }
602 french_u = french { chunkLetters = "u", chunkPhonetic = ["y"] }
603 french_v = french { chunkLetters = "v", chunkPhonetic = ["ve"] }
604 french_w = french { chunkLetters = "w", chunkPhonetic = ["dubl", "ve"] }
605 french_x = french { chunkLetters = "x", chunkPhonetic = ["ɛks"] }
606 french_y = french { chunkLetters = "y", chunkPhonetic = ["igʁɛk"] }
607 french_z = french { chunkLetters = "z", chunkPhonetic = ["zɛd"] }
608
609 type Patterns = Map Step PatTree
610
611 patterns :: Map Step PatTree
612 patterns =
613 [ Step PosNext (PatChar 'a') :=
614 [ Step PosNext PatLexicalBorder :=
615 PatTreeLeaf ["a" := "ə"]
616 ]
617 , Step PosNext (PatChar 't') :=
618 [ Step PosNext (PatChar 'h') :=
619 [ Step PosNext (PatChar 'e') :=
620 [ Step PosNext (PatLexicalCategory Char.Space) :=
621 PatTreeLeaf ["the" := "zi"]
622 ]
623 ]
624 ]
625 ]
626
627 data State = State
628 { stateInput :: LZ.Zipper Inp
629 , stateBuffer :: [Step]
630 , statePats :: Patterns
631 , statePatReset :: Bool
632 }
633
634 data Pat
635 = PatChar Char
636 | PatLexicalCategory Char.GeneralCategory
637 | PatLexicalBorder
638 | PatPhoneticVowel
639 | PatPhoneticSemiVowel
640 | PatPhoneticConsonant
641 deriving (Eq, Ord, Show)
642
643 data Inp = Inp
644 { inpSteps :: [Step]
645 , inpPronunciations :: Pronunciations
646 }
647 deriving (Show)
648
649 parse :: Patterns -> Text -> Either String [Inp]
650 parse initPats input =
651 loop
652 State
653 { stateInput = input & Text.unpack & fmap charToInp & LZ.fromList
654 , stateBuffer = []
655 , statePats = initPats
656 , statePatReset = True
657 }
658 & stateInput
659 & LZ.toList
660 & Right
661 where
662 charToInp :: Char -> Inp
663 charToInp c =
664 Inp
665 { inpSteps = [Step { stepPos = PosNext, stepPat = PatChar c }]
666 , inpPronunciations = []
667 }
668 loop :: State -> State
669 loop st =
670 [ look (key, val) st
671 | (key, val) <- st & statePats & Map.toList
672 ]
673 & catMaybes
674 & headMaybe
675 & fromMaybe (loop st{statePats = initPats})
676 look :: (Step, PatTree) -> State -> Maybe State
677 look kv@(key, val) st =
678 case key of
679 Step{stepPos = PosPrev, stepPat = patPrev} -> errorShow ("prev" :: Text)
680 Step{stepPos = PosNext, stepPat = patNext} ->
681 case patNext of
682 PatLexicalBorder
683 | stateInput st & LZ.endp -> match kv st
684 -- PatChar c
685 -- | Just inpNext <- stateInput st & LZ.safeCursor ->
686 -- inpNext & inpSteps &
687 -- case of
688 -- Nothing ->
689 _ -> Nothing
690 match kv@(key, val) st =
691 case val of
692 PatTreeLeaf pron ->
693 Just
694 st
695 { statePats = initPats
696 , stateBuffer = []
697 , stateInput =
698 stateInput st
699 & LZ.insert
700 Inp
701 { inpSteps = stateBuffer st & (key :) & List.reverse
702 , inpPronunciations = pron
703 }
704 }
705 PatTreeBranches pats ->
706 Just
707 st
708 { statePats = pats
709 , stateBuffer = key : stateBuffer st
710 }
711 -}