]> Git — Sourcephile - julm/worksheets.git/blob - src/Language/Pronunciation.hs
update
[julm/worksheets.git] / src / Language / Pronunciation.hs
1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE RankNTypes #-}
4 {-# LANGUAGE StandaloneDeriving #-}
5
6 module Language.Pronunciation where
7
8 import Control.Applicative (Alternative (..))
9 import Control.Monad.Combinators qualified as P
10 import Control.Monad.Trans.Class qualified as MT
11 import Control.Monad.Trans.State qualified as MT
12 import Data.List qualified as List
13 import Data.List.Zipper qualified as LZ
14 import Data.Map.Strict qualified as Map
15 import Data.Set qualified as Set
16 import Data.Text qualified as Text
17 import Data.Text.Short qualified as TextShort
18 import Data.Traversable (traverse)
19 import Language
20 import Paths_worksheets qualified as Self
21 import System.FilePath.Posix ((</>))
22 import System.FilePath.Posix qualified as File
23 import Text.Blaze.Html5.Attributes qualified as HA
24 import Text.Megaparsec qualified as P
25 import Worksheets.Utils.Char qualified as Char
26 import Worksheets.Utils.HTML (className, classes, styles, (!))
27 import Worksheets.Utils.HTML qualified as HTML
28 import Worksheets.Utils.IPA qualified as IPA
29 import Worksheets.Utils.Paper qualified as Paper
30 import Worksheets.Utils.Prelude
31
32 data Pronunciation = Pronunciation
33 { pronunciationIPABroad :: [IPA.Syllable []]
34 , pronunciationText :: Text
35 }
36 deriving (Eq, Ord, Show)
37 instance Semigroup Pronunciation where
38 x <> y =
39 Pronunciation
40 { pronunciationIPABroad = pronunciationIPABroad x <> pronunciationIPABroad y
41 , pronunciationText =
42 [pronunciationText x, pronunciationText y]
43 & List.filter (/= "")
44 & Text.intercalate "."
45 }
46 instance IsList Pronunciation where
47 type Item Pronunciation = IPA.Syllable []
48 toList = pronunciationIPABroad
49
50 -- fromList :: HasCallStack => [Item Pronunciation] -> Pronunciation
51 fromList l =
52 Pronunciation
53 { pronunciationIPABroad = ipa
54 , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
55 }
56 where
57 ipa =
58 l
59 & mapButLast (IPA.WithSuprasegmentalFeature IPA.Break)
60 & fromList
61 instance IsString Pronunciation where
62 fromString s =
63 Pronunciation
64 { pronunciationIPABroad = ipa
65 , pronunciationText = ipa & foldMap (IPA.toIPA_ >>> IPA.unIPA)
66 }
67 where
68 ipa =
69 s
70 & Text.pack
71 & IPA.parseSyllables @[]
72 & either errorShow id
73
74 {-
75 data PronunciationKey = PronunciationKey
76 { pronunciationKeyText :: Text
77 , pronunciationKeyPron :: Pronunciation
78 , pronunciationKey
79 }
80 -}
81
82 newtype Pronunciations = Pronunciations
83 { unPronunciations :: [(RuleLexemes, Pronunciation)]
84 }
85 deriving (Eq, Ord)
86 deriving newtype (Show)
87 deriving newtype (Semigroup)
88 deriving newtype (Monoid)
89 joinPronunciations :: Pronunciations -> Pronunciations
90 joinPronunciations (Pronunciations ps) =
91 Pronunciations
92 [ input :=
93 Pronunciation
94 { pronunciationIPABroad = ipa
95 , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
96 }
97 ]
98 where
99 input :: RuleLexemes
100 ipa :: [IPA.Syllable []]
101 (input, ipa) =
102 ps
103 & List.foldr
104 ( \(inp, Pronunciation{pronunciationIPABroad}) (suffix, l, acc) ->
105 case pronunciationIPABroad of
106 [] -> (inp <> suffix, IPA.Syllable [], acc)
107 [syl@(IPA.Syllable [])] -> (inp <> suffix, syl, acc)
108 [syl] -> (inp <> suffix, IPA.Syllable [], (syl <> l) : acc)
109 [sylL, sylR] -> (inp <> suffix, sylL, glueSyllableToTheRight sylR acc)
110 _ -> errorShow pronunciationIPABroad
111 )
112 ( ""
113 , IPA.Syllable []
114 , []
115 )
116 & ( \(i, l, acc) ->
117 ( i
118 , glueSyllableToTheRight l acc
119 & mapButLast (IPA.setSuprasegmentalFeatures [IPA.Break])
120 )
121 )
122 glueSyllableToTheRight ::
123 IPA.Syllable [] ->
124 [IPA.Syllable []] ->
125 [IPA.Syllable []]
126 glueSyllableToTheRight x y =
127 case y of
128 [] -> [x]
129 [yL] -> [x <> yL]
130 yL : yR -> x <> yL : yR
131 instance IsList Pronunciations where
132 type Item Pronunciations = (RuleLexemes, Pronunciation)
133 toList = unPronunciations
134 fromList l = Pronunciations{unPronunciations = l & fromList}
135
136 {-
137 instance IsString Pronunciations where
138 fromString = \case
139 "" -> Pronunciations "" [IPA.Syllable [IPA.Zero]]
140 s -> Pronunciations (s & Text.pack) $
141 fromString s
142 & IPA.parseSyllables @[]
143 & either errorShow id
144 -}
145 data ExampleLiteral = ExampleLiteral
146 { exampleLiteralText :: ShortText
147 , exampleLiteralTags :: Set LiteralTag
148 , exampleLiteralMeaning :: ShortText
149 }
150 deriving (Eq, Ord, Show)
151 instance IsString ExampleLiteral where
152 fromString s =
153 ExampleLiteral
154 { exampleLiteralText = s & fromString
155 , exampleLiteralTags = Set.empty
156 , exampleLiteralMeaning = ""
157 }
158 data LiteralTag
159 = LiteralTagOccurence
160 | LiteralTagMeta
161 | LiteralTagSilent
162 deriving (Eq, Ord, Show)
163 exampleLiteralsText :: [ExampleLiteral] -> ShortText
164 exampleLiteralsText ls = ls <&> exampleLiteralText & mconcat
165
166 hyphen =
167 ExampleLiteral
168 { exampleLiteralText = "-"
169 , exampleLiteralTags = [LiteralTagMeta] & Set.fromList
170 , exampleLiteralMeaning = ""
171 }
172 occurence lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagOccurence}
173 silent lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagSilent}
174
175 type SyllableText = ShortText
176 type SyllableBroad = IPA.Syllable []
177 type SyllablesTable = Map SyllableText (Map SyllableText [([ExampleLiteral], SyllableBroad)])
178
179 data Adjacent
180 = AdjacentBorder
181 | AdjacentVowel
182 | AdjacentConsonant
183 deriving (Eq, Ord, Show)
184 data Variant
185 = VariantDefinition Text
186 | VariantStress
187 deriving (Eq, Ord, Show)
188 data Rule = Rule
189 { -- , ruleStress :: Bool
190 -- , ruleDefinition :: Maybe Text
191
192 -- [ "e" := ["ɛ"] , "x" := ["g","z"] , "er" := ["ɛʁ"] , "cice" := ["sis"] ]
193 -- [ "exercice" := ["ɛg.zɛʁ.sis"]
194 -- ]
195 rulePron :: Pronunciations
196 , ruleExamples :: Map InputLexemes Pronunciation
197 }
198 deriving (Eq, Ord, Show)
199
200 rule =
201 Rule
202 { rulePron = Pronunciations{unPronunciations = []}
203 , ruleExamples = mempty
204 }
205 space = rule
206 word
207 , begining
208 , ending ::
209 RuleLexemes -> RuleLexemes
210 word = begining >>> ending
211 begining = after [LexemeBorder]
212 ending = before [LexemeBorder]
213 before ls r = RuleLexemes (unRuleLexemes r <> ls)
214 after ls r = RuleLexemes (ls <> unRuleLexemes r)
215 meaning r d = RuleLexemes (unRuleLexemes r <> [LexemeMeaning d])
216
217 type Table = Map RuleLexemes Rule
218 examples :: Table -> Map InputLexemes Pronunciation
219 examples tbl =
220 [ v & ruleExamples
221 | v <- tbl & Map.elems
222 ]
223 & Map.unionsWith (\new old -> if new == old then new else errorShow (new, old))
224
225 data Pron = Pron
226 { pronInput :: [Lexeme]
227 , pronRule :: Rule
228 }
229 deriving (Eq, Show)
230
231 data Syl = Syl
232 { sylText :: Text
233 , sylDependsOnBefore :: Bool
234 , sylDependsOnAfter :: Bool
235 , sylDependsOnMeaning :: Bool
236 , sylSound :: Text -- [IPA.Syllable []]
237 , sylIndex :: Int
238 , sylSilent :: Bool
239 , sylSplit :: Bool
240 }
241
242 addIndexes :: [[Either Char Pron]] -> [[Syl]]
243 addIndexes = go 0
244 where
245 go _idx [] = []
246 go idx (prons : next) = List.reverse prons' : go idx' next
247 where
248 (idx', prons') =
249 prons
250 & List.foldl'
251 ( \(i, is) -> \case
252 Left c ->
253 ( i
254 , Syl
255 { sylText = Text.singleton c
256 , sylDependsOnAfter = False
257 , sylDependsOnBefore = False
258 , sylDependsOnMeaning = False
259 , sylSound = []
260 , sylIndex = i
261 , sylSilent = True
262 , sylSplit = False
263 }
264 : is
265 )
266 Right Pron{pronRule = Rule{rulePron = Pronunciations{unPronunciations = ps}}} ->
267 ps
268 & List.foldl'
269 ( \(j, js) (t, Pronunciation{..}) ->
270 let sylText = t & unRuleLexemes & lexemesChars & Text.pack
271 in case pronunciationIPABroad of
272 []
273 | not (Text.null pronunciationText) ->
274 ( j + 1
275 , Syl
276 { sylText
277 , sylSound = pronunciationText
278 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
279 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
280 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
281 , sylIndex = j + 1
282 , sylSilent = False
283 , sylSplit = False
284 }
285 : js
286 )
287 [IPA.Syllable []]
288 | Text.null pronunciationText ->
289 ( j
290 , case js of
291 [] ->
292 Syl
293 { sylText
294 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
295 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
296 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
297 , sylSound = ""
298 , sylIndex = j
299 , sylSilent = True
300 , sylSplit = False
301 }
302 : js
303 j0@Syl{sylText = j0t} : jss -> j0{sylText = j0t <> sylText} : jss
304 )
305 syls
306 | (syls & all IPA.isSilent) && Text.null pronunciationText ->
307 ( j
308 , Syl
309 { sylText
310 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
311 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
312 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
313 , sylSound = ""
314 , sylIndex = j
315 , sylSilent = True
316 , sylSplit = False
317 }
318 : js
319 )
320 _syls@[_] ->
321 ( j + 1
322 , Syl
323 { sylText
324 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
325 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
326 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
327 , sylSound = pronunciationText
328 , sylIndex = j + 1
329 , sylSilent = False
330 , sylSplit = False
331 }
332 : js
333 )
334 _syls@[_, _] ->
335 ( j
336 , Syl
337 { sylText
338 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
339 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
340 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
341 , sylSound = pronunciationText
342 , sylIndex = j
343 , sylSilent = False
344 , sylSplit = True
345 }
346 : js
347 )
348 syls -> errorShow syls
349 )
350 (i, is)
351 where
352 sylDependsOnAfter =
353 List.reverse >>> \case
354 LexemeBorder : _ -> True
355 LexemeSilent : _ -> True
356 LexemeConsonant : _ -> True
357 LexemeDoubleConsonant : _ -> True
358 LexemeVowel : _ -> True
359 LexemeSemiVowel : _ -> True
360 _ -> False
361 sylDependsOnBefore =
362 \case
363 LexemeBorder : _ -> True
364 LexemeSilent : _ -> True
365 LexemeConsonant : _ -> True
366 LexemeDoubleConsonant : _ -> True
367 LexemeVowel : _ -> True
368 LexemeSemiVowel : _ -> True
369 _ -> False
370 sylDependsOnMeaning =
371 List.reverse >>> \case
372 LexemeMeaning{} : _ -> True
373 _ -> False
374 )
375 (idx, [])
376
377 withCapital :: [(RuleLexemes, Rule)] -> [(RuleLexemes, Rule)]
378 withCapital =
379 foldMap \(RuleLexemes pat, rul) ->
380 [ (RuleLexemes pat, rul)
381 ,
382 ( RuleLexemes (withCapitalLexemes pat)
383 , rul
384 { rulePron = rul & rulePron & withCapitalPronunciations
385 , ruleExamples =
386 rul
387 & ruleExamples
388 & Map.mapKeys
389 ( unInputLexemes
390 >>> withCapitalLexemes
391 >>> InputLexemes
392 )
393 }
394 )
395 ]
396 where
397 withCapitalPronunciations (Pronunciations []) = Pronunciations []
398 withCapitalPronunciations (Pronunciations ((t, p) : ps)) =
399 Pronunciations ((RuleLexemes $ withCapitalLexemes $ unRuleLexemes t, p) : ps)
400 withCapitalLexemes (LexemeChar x : xs) = LexemeChar (Char.toUpper x) : xs
401 withCapitalLexemes (x : xs) = x : withCapitalLexemes xs
402 withCapitalLexemes [] = []
403
404 lexemesChars :: [Lexeme] -> [Char]
405 lexemesChars p =
406 p & foldMap \case
407 LexemeChar c -> [c]
408 _ -> []
409
410 run ::
411 Table ->
412 Text ->
413 Either
414 ( Either
415 (P.ParseErrorBundle Text ())
416 (P.ParseErrorBundle [Lexeme] ())
417 )
418 [Either Char Pron]
419 run rules inp =
420 inp
421 & runLexer
422 & either (Left . Left) \lexs ->
423 lexs
424 & runParser rules
425 & either (Left . Right) Right
426
427 runParser :: Table -> [Lexeme] -> Either (P.ParseErrorBundle [Lexeme] ()) [Either Char Pron]
428 runParser tbl inp = inp & P.runParser (parser tbl) "input"
429
430 parseLiterals ::
431 Table ->
432 [ExampleLiteral] ->
433 Either
434 ( Either
435 (P.ParseErrorBundle Text ())
436 (P.ParseErrorBundle [Lexeme] ())
437 )
438 [Either Char Pron]
439 parseLiterals rules inp =
440 inp
441 & traverse
442 ( \ExampleLiteral{..} ->
443 exampleLiteralText
444 & TextShort.toText
445 & runLexer
446 <&> ( <>
447 [ LexemeMeaning exampleLiteralMeaning
448 | exampleLiteralMeaning & TextShort.null & not
449 ]
450 )
451 )
452 & either (Left . Left) \lexs ->
453 lexs
454 & mconcat
455 & runParser rules
456 & either (Left . Right) Right
457
458 parser :: Table -> P.Parsec () [Lexeme] [Either Char Pron]
459 parser tbl = do
460 res <- P.many $ (Just . Right) <$> parseRules <|> parseChar
461 P.eof
462 return $ res & catMaybes
463 where
464 -- Match one of the rules, trying longuest first
465 parseRules :: P.Parsec () [Lexeme] Pron
466 parseRules =
467 P.choice
468 [ parseRule r
469 | r <- tbl & Map.toDescList
470 ]
471 {-
472 <|> P.choice
473 [ parseRule
474 ( RuleLexemes $
475 rulePat & unRuleLexemes <&> \case
476 LexemeChar c -> LexemeChar (c & Char.toUpper)
477 x -> x
478 , curRule
479 )
480 | (rulePat, curRule) <- tbl & Map.toDescList
481 ]
482 -}
483 parseRule (rulePat, curRule@Rule{..}) =
484 P.try do
485 let
486 pat = rulePat & unRuleLexemes
487 patSep = (`List.elem` list [LexemeVowel, LexemeSemiVowel, LexemeConsonant, LexemeSilent])
488 -- (patEnd, patBegin) = pat & List.reverse & List.span patSep
489 patBegin = pat & List.dropWhileEnd patSep
490 patEnd = pat & List.reverse & List.takeWhile patSep & List.reverse
491 -- parse without the ending Lexeme{Vowel,SemiVowel,Consonant}
492 P.chunk patBegin
493 inpAfterBegin <- P.getInput
494 unless (List.null patEnd) do
495 inpWithAhead <- parseAhead
496 -- traceShowM ("inpWithAhead"::Text, inpWithAhead)
497 P.setInput inpWithAhead
498 P.chunk patEnd & void
499 -- insert the Lexeme{Vowel,SemiVowel,Consonant} from the output of the current rule
500 let lastSound =
501 rulePron
502 & unPronunciations
503 & List.reverse
504 & headMaybe
505 & maybe
506 []
507 ( snd
508 >>> pronunciationIPABroad
509 >>> List.reverse
510 >>> headMaybe
511 >>> maybe [] (IPA.syllableToSegments >>> List.reverse >>> lexemeHeadSound)
512 )
513 P.setInput $ lastSound <> inpAfterBegin
514 return Pron{pronInput = pat, pronRule = curRule}
515 parseChar :: P.Parsec () [Lexeme] (Maybe (Either Char Pron))
516 parseChar =
517 P.anySingle <&> \case
518 LexemeChar c -> Just $ Left c
519 _ -> Nothing
520 parseAhead :: P.Parsec () [Lexeme] [Lexeme]
521 parseAhead = do
522 nextStep <- P.observing $ Right <$> parseRules <|> Left <$> P.anySingle
523 -- traceShowM ("nextStep"::Text, nextStep & either (\err -> Left ()) Right)
524 case nextStep of
525 Right (Right Pron{pronInput, pronRule}) -> do
526 let x =
527 pronRule
528 & rulePron
529 & unPronunciations
530 & headMaybe
531 & maybe
532 []
533 ( snd
534 >>> pronunciationIPABroad
535 >>> headMaybe
536 >>> maybe [] (IPA.syllableToSegments >>> lexemeHeadSound)
537 )
538 inp <- P.getInput
539 return $ x <> pronInput <> inp
540 Right (Left lex) -> do
541 parseAhead <&> (lex :)
542 Left{} -> P.getInput
543 lexemeHeadSound :: [_] -> [Lexeme]
544 lexemeHeadSound =
545 headMaybe >>> fmap IPA.dropSegmentalFeatures >>> \case
546 Just IPA.Zero{} -> [LexemeSilent]
547 Just IPA.Vowel{} -> [LexemeVowel]
548 Just (IPA.Consonant consonant) -> do
549 case consonant of
550 IPA.Pulmonic _phonation _place IPA.Approximant -> [LexemeSemiVowel]
551 IPA.Ejective _place IPA.Approximant -> [LexemeSemiVowel]
552 _ -> [LexemeConsonant]
553 _ -> [] -- error
554
555 runLexer :: Text -> Either (P.ParseErrorBundle Text ()) [Lexeme]
556 runLexer inp = inp & P.runParser lexer "input"
557
558 exampleLiteralsLexemes :: [ExampleLiteral] -> [Lexeme]
559 exampleLiteralsLexemes ls =
560 ls & foldMap \ExampleLiteral{..} ->
561 unRuleLexemes (fromString (TextShort.unpack exampleLiteralText))
562 <> [ LexemeMeaning exampleLiteralMeaning
563 ]
564
565 lexer :: P.Parsec () Text [Lexeme]
566 lexer = do
567 lls <- P.many do
568 P.choice $
569 list
570 [ P.takeWhile1P Nothing Char.isSpace >>= \cs ->
571 return [LexemeChar c | c <- cs & Text.unpack]
572 , do
573 cs <- P.takeWhile1P Nothing Char.isLetter
574 mean <- (<|> return []) $ P.try do
575 P.single '{'
576 m <- P.takeWhile1P Nothing (/= '}')
577 P.single '}'
578 return [LexemeMeaning (TextShort.fromText m)]
579 return $
580 LexemeBorder
581 : [LexemeChar c | c <- cs & Text.unpack]
582 <> mean
583 <> [LexemeBorder]
584 , P.takeWhile1P Nothing Char.isNumber >>= \cs ->
585 return (LexemeBorder : ([LexemeChar c | c <- cs & Text.unpack] <> [LexemeBorder]))
586 , P.satisfy Char.isSymbol >>= \c ->
587 return [LexemeChar c]
588 , P.satisfy Char.isSeparator >>= \c ->
589 return [LexemeChar c]
590 , P.satisfy Char.isMark >>= \c ->
591 return [LexemeChar c]
592 , P.satisfy Char.isPunctuation >>= \c ->
593 return [LexemeChar c]
594 ]
595 P.eof
596 return $ mconcat lls
597
598 words :: [Either Char Pron] -> [[Either Char Pron]]
599 words [] = []
600 words prons = word0 : words next
601 where
602 (word0, rest) = prons & List.span (isSep >>> not)
603 (_sep, next) = rest & List.span isSep
604 isSep = \case
605 Left c | c & Char.isSpace -> True
606 _ -> False
607
608 data Input = Input
609 { inputText :: Text
610 , inputPhonetic :: [IPA.Syllable []]
611 , inputMeaning :: Maybe ShortText
612 }
613
614 patterns :: Map PatKey PatNode
615 patterns =
616 [ PatKeyNext (PatContextChar 'a') :=
617 [ PatKeyNext PatContextLexicalBorder :=
618 PatEnd ["a" := "ə"]
619 ]
620 , PatKeyNext (PatContextChar 't') :=
621 [ PatKeyNext (PatContextChar 'h') :=
622 [ PatKeyNext (PatContextChar 'e') :=
623 [ PatKeyNext (PatContextLexicalCategory Char.Space) :=
624 PatEnd ["the" := "zi"]
625 ]
626 ]
627 ]
628 ]
629
630 data State = State
631 { stateInput :: LZ.Zipper Inp
632 , stateBuffer :: [PatKey]
633 , statePats :: Map PatKey PatNode
634 , statePatReset :: Bool
635 }
636
637 parse :: Map PatKey PatNode -> Text -> [Inp]
638 parse initPats input =
639 loop
640 State
641 { stateInput = input & Text.unpack & fmap charToInp & LZ.fromList
642 , stateBuffer = []
643 , statePats = initPats
644 , statePatReset = True
645 }
646 & stateInput
647 & LZ.toList
648 where
649 charToInp :: Char -> Inp
650 charToInp c =
651 Inp
652 { inpPats = [PatKeyNext (PatContextChar c)]
653 , inpPronunciations = []
654 }
655 loop :: State -> State
656 loop st =
657 [ look (key, val) st
658 | (key, val) <- st & statePats & Map.toList
659 ]
660 & catMaybes
661 & headMaybe
662 & fromMaybe (loop st{statePats = initPats})
663 look :: (PatKey, PatNode) -> State -> Maybe State
664 look kv@(key, val) st =
665 case key of
666 PatKeyPrev patPrev -> errorShow ("prev" :: Text)
667 PatKeyNext patNext ->
668 case patNext of
669 PatContextLexicalBorder
670 | stateInput st & LZ.endp -> match kv st
671 -- PatContextChar c
672 -- | Just inpNext <- stateInput st & LZ.safeCursor ->
673 -- inpNext & inpPats &
674 -- case of
675 -- Nothing ->
676 _ -> Nothing
677 match kv@(key, val) st =
678 case val of
679 PatEnd pron ->
680 Just
681 st
682 { statePats = initPats
683 , stateBuffer = []
684 , stateInput =
685 stateInput st
686 & LZ.insert
687 Inp
688 { inpPats = stateBuffer st & (key :) & List.reverse
689 , inpPronunciations = pron
690 }
691 }
692 PatNode pats ->
693 Just
694 st
695 { statePats = pats
696 , stateBuffer = key : stateBuffer st
697 }
698
699 {-
700 case statePats st & Map.lookup k of
701 Nothing -> st
702 Just (PatNode pats) ->
703 loop st {statePats = pats, stateBuffer = k : stateBuffer st}
704 Just (PatEnd end) ->
705 loop st { statePats = initPats
706 , stateBuffer = []
707 , stateInput = stateInput st
708 & LZ.insert Inp
709 { inpPats = stateBuffer st & List.reverse
710 , inpPronunciations = end
711 }
712 & LZ.right
713 }
714 -}
715
716 {-
717 parse :: PatNode -> Text -> [Inp]
718 parse initPats input =
719 let inpZip = input & Text.unpack & fmap charToInp & LZ.fromList in
720 runInp [] initPats inpZip & LZ.toList
721 where
722 charToInp :: Char -> Inp
723 charToInp c = Inp
724 { inpPats = [PatKeyNext (PatContextChar c)]
725 , inpPronunciations = []
726 }
727 runInp :: [PatKey] -> PatNode -> LZ.Zipper Inp -> LZ.Zipper Inp
728 runInp oks pat inp =
729 traceShow ("runInp"::Text, ("oks"::Text) := oks, ("cur"::Text) := LZ.safeCursor inp) $
730 case pat of
731 PatEnd end ->
732 -- the pattern ends
733 inp
734 & LZ.insert Inp
735 { inpPats = oks & List.reverse
736 , inpPronunciations = end
737 }
738 & LZ.right
739 & runInp [] initPats
740 PatNode pats ->
741 -- the pattern may go on
742 case inp & LZ.safeCursor of
743 Nothing ->
744 inp
745 & runPat [] oks [PatKeyNext PatContextLexicalBorder] pats
746 Just cur ->
747 inp & LZ.delete
748 & runPat [] oks (inpPats cur & List.sort) pats
749
750 runPat :: [PatKey] -> [PatKey] -> [PatKey] -> Map PatKey PatNode -> LZ.Zipper Inp -> LZ.Zipper Inp
751 runPat kos oks todos pats inp =
752 traceShow ( "runPat"::Text
753 , ("kos"::Text) := kos
754 , ("oks"::Text) := oks
755 , ("todos"::Text) := todos
756 , ("cur"::Text) :=LZ.safeCursor inp
757 ) $
758 case todos of
759 [] | LZ.endp inp -> inp
760 & runInp kos (PatEnd [])
761 & runInp oks (PatEnd [])
762 [] ->
763 -- nothing left to advance the pattern
764 --traceShow ("runPat/[]"::Text) $
765 inp
766 & (if null kos then id else runInp kos (PatEnd []))
767 & runInp oks (PatNode pats)
768 k:ks ->
769 case pats & Map.lookup k of
770 -- the pattern ends
771 Just (PatEnd end) ->
772 --traceShow ("runPat/End"::Text) $
773 -- ks forgotten
774 inp
775 & (if null kos then id else runInp kos (PatEnd []))
776 & runInp (k:oks) (PatEnd end)
777 -- the pattern advances
778 Just (PatNode nextPats) ->
779 --traceShow ("runPat/Node"::Text) $
780 inp & runPat kos (k:oks) ks nextPats
781 -- the pattern does not advance
782 Nothing ->
783 inp & runPat (k:kos) oks ks pats
784 -}
785
786 data PatContext
787 = PatContextChar Char
788 | PatContextLexicalCategory Char.GeneralCategory
789 | PatContextLexicalBorder
790 | PatContextPhoneticVowel
791 | PatContextPhoneticSemiVowel
792 | PatContextPhoneticConsonant
793 deriving (Eq, Ord, Show)
794
795 data Inp = Inp
796 { inpPats :: [PatKey]
797 , inpPronunciations :: Pronunciations
798 }
799 deriving (Show)
800
801 data PatKey
802 = PatKeyPrev PatContext
803 | PatKeyNext PatContext
804 deriving (Eq, Ord, Show)
805 data PatNode
806 = PatNode (Map PatKey PatNode)
807 | PatEnd Pronunciations
808 deriving (Show)
809 instance IsList PatNode where
810 type Item PatNode = (PatKey, PatNode)
811 fromList = PatNode . Map.fromListWith (errorShow)
812 toList = errorShow
813
814 data Lexeme
815 = LexemeBorder
816 | LexemeVowel
817 | LexemeSemiVowel
818 | LexemeConsonant
819 | LexemeDoubleConsonant
820 | LexemeSilent
821 | LexemeMeaning ShortText
822 | -- | `LexemeChar` is last to have priority when using `Map.toDescList`
823 LexemeChar Char
824 deriving (Eq, Ord, Show)
825
826 -- data Sound
827 -- = SoundVowel
828 -- | SoundConsonant
829 -- deriving (Eq, Ord, Show)
830
831 {-
832 newtype Lexemes = Lexemes { unLexemes :: [Lexeme] }
833 deriving (Eq, Ord, Show)
834 instance P.Stream Lexemes where
835 type Token Lexemes = Lexeme
836 type Tokens Lexemes = Lexemes
837 tokensToChunk _px = Lexemes
838 chunkToTokens _px = unLexemes
839 chunkLength _px = unLexemes >>> List.length
840 chunkEmpty _px = unLexemes >>> List.null
841 take1_ = unLexemes >>> P.take1_ >>> coerce
842 takeN_ n = unLexemes >>> P.takeN_ n >>> coerce
843 takeWhile_ p = unLexemes >>> P.takeWhile_ p >>> coerce
844
845 instance IsString Lexemes where
846 fromString s =
847 s
848 & Text.pack
849 & runLexer
850 & either
851 errorShow
852 ((`appEndo` []) >>> Lexemes)
853 -}
854
855 newtype RuleLexemes = RuleLexemes {unRuleLexemes :: [Lexeme]}
856 deriving (Eq, Ord, Show)
857 instance HasTypeDefault RuleLexemes where
858 typeDefault = RuleLexemes typeDefault
859 instance Semigroup RuleLexemes where
860 RuleLexemes x <> RuleLexemes y = RuleLexemes (x <> y)
861 instance Monoid RuleLexemes where
862 mempty = RuleLexemes mempty
863 instance IsList RuleLexemes where
864 type Item RuleLexemes = Lexeme
865 fromList = RuleLexemes
866 toList = unRuleLexemes
867 instance IsString RuleLexemes where
868 fromString s =
869 s
870 & Text.pack
871 & runLexer
872 & either
873 errorShow
874 ( List.dropWhileEnd (== LexemeBorder)
875 >>> List.dropWhile (== LexemeBorder)
876 >>> RuleLexemes
877 )
878
879 newtype InputLexemes = InputLexemes {unInputLexemes :: [Lexeme]}
880 deriving (Eq, Ord, Show)
881 instance HasTypeDefault InputLexemes where
882 typeDefault = InputLexemes typeDefault
883 instance Semigroup InputLexemes where
884 InputLexemes x <> InputLexemes y = InputLexemes (x <> y)
885 instance Monoid InputLexemes where
886 mempty = InputLexemes mempty
887 instance IsList InputLexemes where
888 type Item InputLexemes = Lexeme
889 fromList = InputLexemes
890 toList = unInputLexemes
891 instance IsString InputLexemes where
892 fromString s =
893 s
894 & Text.pack
895 & runLexer
896 & either errorShow InputLexemes
897
898 instance P.ShowErrorComponent () where
899 showErrorComponent = show
900 errorComponentLen _ = 2
901 instance P.VisualStream [Lexeme] where
902 showTokens _s = show
903 tokensLength _s xs = xs <&> (show >>> List.length) & sum
904 instance P.TraversableStream [Lexeme] where
905 reachOffset off pos = (Nothing, pos{P.pstateOffset = P.pstateOffset pos + off})
906
907 data LexemeTag
908 = LexemeTagLetter
909 | LexemeTagSpace
910 | LexemeTagPunctuation
911 | LexemeTagSeparator
912 | LexemeTagMark
913 | LexemeTagSymbol
914 | LexemeTagDefinition
915 | LexemeTagBorder
916 deriving (Eq, Ord, Show)
917 deriving instance Ord (IPA.Syllable [])
918 deriving instance Ord IPA.SuprasegmentalFeature
919 deriving instance Ord IPA.SegmentalFeature
920 deriving instance Ord IPA.Sibilance
921 deriving instance Ord IPA.Manner
922 deriving instance Ord IPA.Phonation
923 deriving instance Ord IPA.Roundedness
924 deriving instance Ord IPA.Height
925 deriving instance Ord IPA.Vowel
926 deriving instance Ord IPA.Consonant
927 deriving instance Ord IPA.Segment
928
929 {-
930 tableToMatch :: Table -> [Lexeme] -> [Pronunciations]
931 tableToMatch tbl = loop
932 where
933 loop prevBorder = \case
934 InputText inp ->
935 [ (matchingLength, )
936 | (trans, transMach) <- chunk & chunkMachine & machineAlts & Map.toList
937 , let matchingLength = transMatchingLength input trans
938 , 0 < matchingLength || not (isTransConsume trans)
939 , let (inputRead, inputRest) = input & Text.splitAt matchingLength
940 ]
941 & Map.fromListWith (\new old -> old)
942 & Map.lookupMax
943 <&> snd
944
945 -}
946 tableHtml :: Table -> IO HTML.Html
947 tableHtml tbl = do
948 dataPath <- Self.getDataDir <&> File.normalise
949 let title :: String = "LexerDict"
950 let pageOrientation = Paper.PageOrientationPortrait
951 let pageSize = Paper.PageSizeA4
952 let partLangue = LangueFrançais
953 return do
954 HTML.docTypeHtml do
955 HTML.head do
956 HTML.title $ title & HTML.toHtml
957 forM_
958 ( [ "styles/Paper.css"
959 , "styles/French/Lexer.css"
960 , "styles/Rosetta/Reading.css"
961 ]
962 & list
963 )
964 \cssFile ->
965 HTML.link
966 ! HA.rel "stylesheet"
967 ! HA.type_ "text/css"
968 ! HA.href (dataPath </> cssFile & HTML.toValue)
969 HTML.styleCSS $ HTML.cssPrintPage pageOrientation pageSize
970 -- HTML.styleCSS $ pageDifficulties & difficultyCSS
971 HTML.body
972 ! classes ["A4", "french-lexer"]
973 $ do
974 "\n"
975 let rulesChunks = tbl & Map.toList & chunksOf 50
976 forM_ rulesChunks \rules ->
977 HTML.section
978 ! classes
979 [ "sheet"
980 ]
981 ! styles
982 []
983 $ do
984 forM_ (rules & List.zip [1 :: Int ..]) \(ruleIndex, (rulePat, Rule{..})) -> do
985 "\n"
986 HTML.div
987 ! classes
988 [ "dict-entry"
989 , if even ruleIndex then "even" else "odd"
990 ]
991 ! styles
992 []
993 $ do
994 "\n"
995 HTML.div
996 ! classes
997 [ "dict-key"
998 , "lang-" <> className partLangue
999 ]
1000 ! styles
1001 []
1002 -- "grid-template-columns" :=
1003 -- (0.5 & cm & HTML.toCSS)
1004 -- & List.replicate lexerDictMaxKeyLength
1005 -- & List.unwords
1006
1007 $ do
1008 forM_ (["model"] :: [String]) \rowKind -> do
1009 forM_ (rulePat & unRuleLexemes) \ruleChar -> do
1010 -- let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
1011 case ruleChar of
1012 LexemeChar c -> do
1013 HTML.span
1014 ! classes
1015 [ "dict-key-cell"
1016 , rowKind
1017 -- , "script-" <> className uniScript
1018 ]
1019 $ do
1020 c & HTML.toHtml
1021 _ -> ""
1022 HTML.div
1023 ! classes
1024 [ "dict-pronunciation"
1025 ]
1026 $ do
1027 -- HTML.span ! classes ["arrow"] $ "→"
1028 case rulePron of
1029 Pronunciations
1030 { unPronunciations =
1031 all (snd >>> pronunciationIPABroad >>> all IPA.isSilent) -> True
1032 } -> ""
1033 Pronunciations{unPronunciations = is} ->
1034 is
1035 & foldMap (snd >>> pronunciationIPABroad >>> foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic))
1036 & HTML.toHtml
1037 HTML.div
1038 ! classes
1039 [ "dict-lexeme"
1040 ]
1041 $ do
1042 -- HTML.span ! classes ["arrow"] $ "→"
1043 forM_ (ruleExamples & Map.toList) \(_inp, Pronunciation{..}) -> do
1044 HTML.span
1045 ! classes
1046 []
1047 $ do
1048 case pronunciationIPABroad of
1049 [] -> pronunciationText & HTML.toHtml
1050 _ -> pronunciationIPABroad & foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic) & HTML.toHtml
1051 "; "