]> Git — Sourcephile - julm/worksheets.git/blob - src/Language/Pronunciation.hs
update
[julm/worksheets.git] / src / Language / Pronunciation.hs
1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE RankNTypes #-}
4 {-# LANGUAGE StandaloneDeriving #-}
5
6 module Language.Pronunciation where
7
8 import Control.Applicative (Alternative (..))
9 import Control.Monad.Combinators qualified as P
10 import Control.Monad.Trans.Class qualified as MT
11 import Control.Monad.Trans.State qualified as MT
12 import Data.List qualified as List
13 import Data.List.Zipper qualified as LZ
14 import Data.Map.Strict qualified as Map
15 import Data.Set qualified as Set
16 import Data.Text qualified as Text
17 import Data.Text.Short qualified as TextShort
18 import Data.Traversable (traverse)
19 import Language
20 import Paths_worksheets qualified as Self
21 import System.FilePath.Posix ((</>))
22 import System.FilePath.Posix qualified as File
23 import Text.Blaze.Html5.Attributes qualified as HA
24 import Text.Megaparsec qualified as P
25 import Worksheets.Utils.Char qualified as Char
26 import Worksheets.Utils.HTML (className, classes, styles, (!))
27 import Worksheets.Utils.HTML qualified as HTML
28 import Worksheets.Utils.IPA qualified as IPA
29 import Worksheets.Utils.Paper qualified as Paper
30 import Worksheets.Utils.Prelude
31
32 data Pronunciation = Pronunciation
33 { pronunciationIPABroad :: [IPA.Syllable []]
34 , pronunciationText :: Text
35 }
36 deriving (Eq, Ord, Show)
37 instance Semigroup Pronunciation where
38 x <> y =
39 Pronunciation
40 { pronunciationIPABroad = pronunciationIPABroad x <> pronunciationIPABroad y
41 , pronunciationText =
42 [pronunciationText x, pronunciationText y]
43 & List.filter (/= "")
44 & Text.intercalate "."
45 }
46 instance IsList Pronunciation where
47 type Item Pronunciation = IPA.Syllable []
48 toList = pronunciationIPABroad
49
50 -- fromList :: HasCallStack => [Item Pronunciation] -> Pronunciation
51 fromList l =
52 Pronunciation
53 { pronunciationIPABroad = ipa
54 , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
55 }
56 where
57 ipa =
58 l
59 & mapButLast (IPA.WithSuprasegmentalFeature IPA.Break)
60 & fromList
61 instance IsString Pronunciation where
62 fromString s =
63 Pronunciation
64 { pronunciationIPABroad = ipa
65 , pronunciationText = ipa & foldMap (IPA.toIPA_ >>> IPA.unIPA)
66 }
67 where
68 ipa =
69 s
70 & Text.pack
71 & IPA.parseSyllables @[]
72 & either errorShow id
73
74 {-
75 data PronunciationKey = PronunciationKey
76 { pronunciationKeyText :: Text
77 , pronunciationKeyPron :: Pronunciation
78 , pronunciationKey
79 }
80 -}
81
82 newtype Pronunciations = Pronunciations
83 { unPronunciations :: [(RuleLexemes, Pronunciation)]
84 }
85 deriving (Eq, Ord)
86 deriving newtype (Show)
87 deriving newtype (Semigroup)
88 deriving newtype (Monoid)
89 joinPronunciations :: Pronunciations -> Pronunciations
90 joinPronunciations (Pronunciations ps) =
91 Pronunciations
92 [ input :=
93 Pronunciation
94 { pronunciationIPABroad = ipa
95 , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
96 }
97 ]
98 where
99 input :: RuleLexemes
100 ipa :: [IPA.Syllable []]
101 (input, ipa) =
102 ps
103 & List.foldr
104 ( \(inp, Pronunciation{pronunciationIPABroad}) (suffix, l, acc) ->
105 case pronunciationIPABroad of
106 [] -> (inp <> suffix, IPA.Syllable [], acc)
107 [syl@(IPA.Syllable [])] -> (inp <> suffix, syl, acc)
108 [syl] -> (inp <> suffix, IPA.Syllable [], (syl <> l) : acc)
109 [sylL, sylR] -> (inp <> suffix, sylL, glueSyllableToTheRight sylR acc)
110 _ -> errorShow pronunciationIPABroad
111 )
112 ( ""
113 , IPA.Syllable []
114 , []
115 )
116 & ( \(i, l, acc) ->
117 ( i
118 , glueSyllableToTheRight l acc
119 & mapButLast (IPA.setSuprasegmentalFeatures [IPA.Break])
120 )
121 )
122 glueSyllableToTheRight ::
123 IPA.Syllable [] ->
124 [IPA.Syllable []] ->
125 [IPA.Syllable []]
126 glueSyllableToTheRight x y =
127 case y of
128 [] -> [x]
129 [yL] -> [x <> yL]
130 yL : yR -> x <> yL : yR
131 instance IsList Pronunciations where
132 type Item Pronunciations = (RuleLexemes, Pronunciation)
133 toList = unPronunciations
134 fromList l = Pronunciations{unPronunciations = l & fromList}
135
136 {-
137 instance IsString Pronunciations where
138 fromString = \case
139 "" -> Pronunciations "" [IPA.Syllable [IPA.Zero]]
140 s -> Pronunciations (s & Text.pack) $
141 fromString s
142 & IPA.parseSyllables @[]
143 & either errorShow id
144 -}
145 data ExampleLiteral = ExampleLiteral
146 { exampleLiteralText :: ShortText
147 , exampleLiteralTags :: Set LiteralTag
148 , exampleLiteralMeaning :: ShortText
149 }
150 deriving (Eq, Ord, Show)
151 instance IsString ExampleLiteral where
152 fromString s =
153 ExampleLiteral
154 { exampleLiteralText = s & fromString
155 , exampleLiteralTags = Set.empty
156 , exampleLiteralMeaning = ""
157 }
158 data LiteralTag
159 = LiteralTagOccurence
160 | LiteralTagMeta
161 | LiteralTagSilent
162 deriving (Eq, Ord, Show)
163 exampleLiteralsText :: [ExampleLiteral] -> ShortText
164 exampleLiteralsText ls = ls <&> exampleLiteralText & mconcat
165
166 hyphen =
167 ExampleLiteral
168 { exampleLiteralText = "-"
169 , exampleLiteralTags = [LiteralTagMeta] & Set.fromList
170 , exampleLiteralMeaning = ""
171 }
172 occurence lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagOccurence}
173 silent lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagSilent}
174
175 type SyllableText = ShortText
176 type SyllableBroad = IPA.Syllable []
177 type SyllablesTable = Map SyllableText (Map SyllableText [([ExampleLiteral], SyllableBroad)])
178
179 data Adjacent
180 = AdjacentBorder
181 | AdjacentVowel
182 | AdjacentConsonant
183 deriving (Eq, Ord, Show)
184 data Variant
185 = VariantDefinition Text
186 | VariantStress
187 deriving (Eq, Ord, Show)
188 data Rule = Rule
189 { -- , ruleStress :: Bool
190 -- , ruleDefinition :: Maybe Text
191
192 -- [ "e" := ["ɛ"] , "x" := ["g","z"] , "er" := ["ɛʁ"] , "cice" := ["sis"] ]
193 -- [ "exercice" := ["ɛg.zɛʁ.sis"]
194 -- ]
195 rulePron :: Pronunciations
196 , ruleExamples :: Map InputLexemes Pronunciation
197 }
198 deriving (Eq, Ord, Show)
199
200 rule =
201 Rule
202 { rulePron = Pronunciations{unPronunciations = []}
203 , ruleExamples = mempty
204 }
205 space = rule
206 word
207 , begining
208 , ending ::
209 RuleLexemes -> RuleLexemes
210 word = begining >>> ending
211 begining = after [LexemeBorder]
212 ending = before [LexemeBorder]
213 before ls r = RuleLexemes (unRuleLexemes r <> ls)
214 after ls r = RuleLexemes (ls <> unRuleLexemes r)
215 meaning r d = RuleLexemes (unRuleLexemes r <> [LexemeMeaning d])
216
217 type Table = Map RuleLexemes Rule
218 examples :: Table -> Map InputLexemes Pronunciation
219 examples tbl =
220 [ v & ruleExamples
221 | v <- tbl & Map.elems
222 ]
223 & Map.unionsWith (\new old -> if new == old then new else errorShow (new, old))
224
225 data Pron = Pron
226 { pronInput :: [Lexeme]
227 , pronRule :: Rule
228 }
229 deriving (Eq, Show)
230
231 data Syl = Syl
232 { sylText :: Text
233 , sylDependsOnBefore :: Bool
234 , sylDependsOnAfter :: Bool
235 , sylDependsOnMeaning :: Bool
236 , sylSound :: Text -- [IPA.Syllable []]
237 , sylIndex :: Int
238 , sylSilent :: Bool
239 , sylSplit :: Bool
240 }
241
242 addIndexes :: [[Either Char Pron]] -> [[Syl]]
243 addIndexes = go 0
244 where
245 go _idx [] = []
246 go idx (prons : next) = List.reverse prons' : go idx' next
247 where
248 (idx', prons') =
249 prons
250 & List.foldl'
251 ( \(i, is) -> \case
252 Left c ->
253 ( i
254 , Syl
255 { sylText = Text.singleton c
256 , sylDependsOnAfter = False
257 , sylDependsOnBefore = False
258 , sylDependsOnMeaning = False
259 , sylSound = []
260 , sylIndex = i
261 , sylSilent = True
262 , sylSplit = False
263 }
264 : is
265 )
266 Right Pron{pronRule = Rule{rulePron = Pronunciations{unPronunciations = ps}}} ->
267 ps
268 & List.foldl'
269 ( \(j, js) (t, Pronunciation{..}) ->
270 let sylText = t & unRuleLexemes & lexemesChars & Text.pack
271 in case pronunciationIPABroad of
272 []
273 | not (Text.null pronunciationText) ->
274 ( j + 1
275 , Syl
276 { sylText
277 , sylSound = pronunciationText
278 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
279 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
280 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
281 , sylIndex = j + 1
282 , sylSilent = False
283 , sylSplit = False
284 }
285 : js
286 )
287 [IPA.Syllable []]
288 | Text.null pronunciationText ->
289 ( j
290 , case js of
291 [] ->
292 Syl
293 { sylText
294 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
295 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
296 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
297 , sylSound = ""
298 , sylIndex = j
299 , sylSilent = True
300 , sylSplit = False
301 }
302 : js
303 j0@Syl{sylText = j0t} : jss -> j0{sylText = j0t <> sylText} : jss
304 )
305 syls
306 | (syls & all IPA.isSilent) && Text.null pronunciationText ->
307 ( j
308 , Syl
309 { sylText
310 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
311 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
312 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
313 , sylSound = ""
314 , sylIndex = j
315 , sylSilent = True
316 , sylSplit = False
317 }
318 : js
319 )
320 _syls@[_] ->
321 ( j + 1
322 , Syl
323 { sylText
324 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
325 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
326 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
327 , sylSound = pronunciationText
328 , sylIndex = j + 1
329 , sylSilent = False
330 , sylSplit = False
331 }
332 : js
333 )
334 _syls@[_, _] ->
335 ( j
336 , Syl
337 { sylText
338 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
339 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
340 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
341 , sylSound = pronunciationText
342 , sylIndex = j
343 , sylSilent = False
344 , sylSplit = True
345 }
346 : js
347 )
348 syls -> errorShow syls
349 )
350 (i, is)
351 where
352 sylDependsOnAfter =
353 List.reverse >>> \case
354 LexemeBorder : _ -> True
355 LexemeSilent : _ -> True
356 LexemeConsonant : _ -> True
357 LexemeDoubleConsonant : _ -> True
358 LexemeVowel : _ -> True
359 LexemeSemiVowel : _ -> True
360 _ -> False
361 sylDependsOnBefore =
362 \case
363 LexemeBorder : _ -> True
364 LexemeSilent : _ -> True
365 LexemeConsonant : _ -> True
366 LexemeDoubleConsonant : _ -> True
367 LexemeVowel : _ -> True
368 LexemeSemiVowel : _ -> True
369 _ -> False
370 sylDependsOnMeaning =
371 List.reverse >>> \case
372 LexemeMeaning{} : _ -> True
373 _ -> False
374 )
375 (idx, [])
376
377 withCapital :: [(RuleLexemes, Rule)] -> [(RuleLexemes, Rule)]
378 withCapital =
379 foldMap \(RuleLexemes pat, rul) ->
380 [ (RuleLexemes pat, rul)
381 ,
382 ( RuleLexemes (withCapitalLexemes pat)
383 , rul
384 { rulePron = rul & rulePron & withCapitalPronunciations
385 , ruleExamples =
386 rul
387 & ruleExamples
388 & Map.mapKeys
389 ( unInputLexemes
390 >>> withCapitalLexemes
391 >>> InputLexemes
392 )
393 }
394 )
395 ]
396 where
397 withCapitalPronunciations (Pronunciations []) = Pronunciations []
398 withCapitalPronunciations (Pronunciations ((t, p) : ps)) =
399 Pronunciations ((RuleLexemes $ withCapitalLexemes $ unRuleLexemes t, p) : ps)
400 withCapitalLexemes (LexemeChar x : xs) = LexemeChar (Char.toUpper x) : xs
401 withCapitalLexemes (x : xs) = x : withCapitalLexemes xs
402 withCapitalLexemes [] = []
403
404 lexemesChars :: [Lexeme] -> [Char]
405 lexemesChars p =
406 p & foldMap \case
407 LexemeChar c -> [c]
408 _ -> []
409
410 run ::
411 Table ->
412 Text ->
413 Either
414 ( Either
415 (P.ParseErrorBundle Text ())
416 (P.ParseErrorBundle [Lexeme] ())
417 )
418 [Either Char Pron]
419 run rules inp =
420 inp
421 & runLexer
422 & either (Left . Left) \lexs ->
423 lexs
424 & runParser rules
425 & either (Left . Right) Right
426
427 runParser :: Table -> [Lexeme] -> Either (P.ParseErrorBundle [Lexeme] ()) [Either Char Pron]
428 runParser tbl inp = inp & P.runParser (parser tbl) "input"
429
430 parseLiterals ::
431 Table ->
432 [ExampleLiteral] ->
433 Either
434 ( Either
435 (P.ParseErrorBundle Text ())
436 (P.ParseErrorBundle [Lexeme] ())
437 )
438 [Either Char Pron]
439 parseLiterals rules inp =
440 inp
441 & traverse
442 ( \ExampleLiteral{..} ->
443 exampleLiteralText
444 & TextShort.toText
445 & runLexer
446 <&> ( <>
447 [ LexemeMeaning exampleLiteralMeaning
448 | exampleLiteralMeaning & TextShort.null & not
449 ]
450 )
451 )
452 & either (Left . Left) \lexs ->
453 lexs
454 & mconcat
455 & runParser rules
456 & either (Left . Right) Right
457
458 parser :: Table -> P.Parsec () [Lexeme] [Either Char Pron]
459 parser tbl = do
460 res <- P.many $ (Just . Right) <$> parseRules <|> parseChar
461 P.eof
462 return $ res & catMaybes
463 where
464 -- Match one of the rules, trying longuest first
465 parseRules :: P.Parsec () [Lexeme] Pron
466 parseRules =
467 P.choice
468 [ parseRule r
469 | r <- tbl & Map.toDescList
470 ]
471 {-
472 <|> P.choice
473 [ parseRule
474 ( RuleLexemes $
475 rulePat & unRuleLexemes <&> \case
476 LexemeChar c -> LexemeChar (c & Char.toUpper)
477 x -> x
478 , curRule
479 )
480 | (rulePat, curRule) <- tbl & Map.toDescList
481 ]
482 -}
483 parseRule (rulePat, curRule@Rule{..}) =
484 P.try do
485 let
486 pat = rulePat & unRuleLexemes
487 patSep = (`List.elem` list [LexemeVowel, LexemeSemiVowel, LexemeConsonant, LexemeSilent])
488 -- (patEnd, patBegin) = pat & List.reverse & List.span patSep
489 patBegin = pat & List.dropWhileEnd patSep
490 patEnd = pat & List.reverse & List.takeWhile patSep & List.reverse
491 -- parse without the ending Lexeme{Vowel,SemiVowel,Consonant}
492 P.chunk patBegin
493 inpAfterBegin <- P.getInput
494 unless (List.null patEnd) do
495 inpWithAhead <- parseAhead
496 -- traceShowM ("inpWithAhead"::Text, inpWithAhead)
497 P.setInput inpWithAhead
498 P.chunk patEnd & void
499 -- insert the Lexeme{Vowel,SemiVowel,Consonant} from the output of the current rule
500 let lastSound =
501 rulePron
502 & unPronunciations
503 & List.reverse
504 & headMaybe
505 & maybe
506 []
507 ( snd
508 >>> pronunciationIPABroad
509 >>> List.reverse
510 >>> headMaybe
511 >>> maybe [] (IPA.syllableToSegments >>> List.reverse >>> lexemeHeadSound)
512 )
513 P.setInput $ lastSound <> inpAfterBegin
514 return Pron{pronInput = pat, pronRule = curRule}
515 parseChar :: P.Parsec () [Lexeme] (Maybe (Either Char Pron))
516 parseChar =
517 P.anySingle <&> \case
518 LexemeChar c -> Just $ Left c
519 _ -> Nothing
520 parseAhead :: P.Parsec () [Lexeme] [Lexeme]
521 parseAhead = do
522 nextStep <- P.observing $ Right <$> parseRules <|> Left <$> P.anySingle
523 -- traceShowM ("nextStep"::Text, nextStep & either (\err -> Left ()) Right)
524 case nextStep of
525 Right (Right Pron{pronInput, pronRule}) -> do
526 let x =
527 pronRule
528 & rulePron
529 & unPronunciations
530 & headMaybe
531 & maybe
532 []
533 ( snd
534 >>> pronunciationIPABroad
535 >>> headMaybe
536 >>> maybe [] (IPA.syllableToSegments >>> lexemeHeadSound)
537 )
538 inp <- P.getInput
539 return $ x <> pronInput <> inp
540 Right (Left lex) -> do
541 parseAhead <&> (lex :)
542 Left{} -> P.getInput
543 lexemeHeadSound :: [_] -> [Lexeme]
544 lexemeHeadSound =
545 headMaybe >>> fmap IPA.dropSegmentalFeatures >>> \case
546 Just IPA.Zero{} -> [LexemeSilent]
547 Just IPA.Vowel{} -> [LexemeVowel]
548 Just (IPA.Consonant consonant) -> do
549 case consonant of
550 IPA.Pulmonic _phonation _place IPA.Approximant -> [LexemeSemiVowel]
551 IPA.Ejective _place IPA.Approximant -> [LexemeSemiVowel]
552 _ -> [LexemeConsonant]
553 _ -> [] -- error
554
555 runLexer :: Text -> Either (P.ParseErrorBundle Text ()) [Lexeme]
556 runLexer inp = inp & P.runParser lexer "input"
557
558 exampleLiteralsLexemes :: [ExampleLiteral] -> [Lexeme]
559 exampleLiteralsLexemes ls =
560 ls & foldMap \ExampleLiteral{..} ->
561 unRuleLexemes (fromString (TextShort.unpack exampleLiteralText))
562 <> [ LexemeMeaning exampleLiteralMeaning
563 ]
564
565 lexer :: P.Parsec () Text [Lexeme]
566 lexer = do
567 lls <- P.many do
568 P.choice $
569 list
570 [ P.takeWhile1P Nothing Char.isSpace >>= \cs ->
571 return [LexemeChar c | c <- cs & Text.unpack]
572 , do
573 cs <- P.takeWhile1P Nothing Char.isLetter
574 mean <- (<|> return []) $ P.try do
575 P.single '{'
576 m <- P.takeWhile1P Nothing (/= '}')
577 P.single '}'
578 return [LexemeMeaning (TextShort.fromText m)]
579 return $
580 LexemeBorder
581 : [LexemeChar c | c <- cs & Text.unpack]
582 <> mean
583 <> [LexemeBorder]
584 , P.takeWhile1P Nothing Char.isNumber >>= \cs ->
585 return (LexemeBorder : ([LexemeChar c | c <- cs & Text.unpack] <> [LexemeBorder]))
586 , P.satisfy Char.isSymbol >>= \c ->
587 return [LexemeChar c]
588 , P.satisfy Char.isSeparator >>= \c ->
589 return [LexemeChar c]
590 , P.satisfy Char.isMark >>= \c ->
591 return [LexemeChar c]
592 , P.satisfy Char.isPunctuation >>= \c ->
593 return [LexemeChar c]
594 ]
595 P.eof
596 return $ mconcat lls
597
598 words :: [Either Char Pron] -> [[Either Char Pron]]
599 words [] = []
600 words prons = word0 : words next
601 where
602 (word0, rest) = prons & List.span (isSep >>> not)
603 (_sep, next) = rest & List.span isSep
604 isSep = \case
605 Left c | c & Char.isSpace -> True
606 _ -> False
607
608
609 {-
610 case statePats st & Map.lookup k of
611 Nothing -> st
612 Just (PatTree pats) ->
613 loop st {statePats = pats, stateBuffer = k : stateBuffer st}
614 Just (PatEnd end) ->
615 loop st { statePats = initPats
616 , stateBuffer = []
617 , stateInput = stateInput st
618 & LZ.insert Inp
619 { inpPats = stateBuffer st & List.reverse
620 , inpPronunciations = end
621 }
622 & LZ.right
623 }
624 -}
625
626 {-
627 parse :: PatTree -> Text -> [Inp]
628 parse initPats input =
629 let inpZip = input & Text.unpack & fmap charToInp & LZ.fromList in
630 runInp [] initPats inpZip & LZ.toList
631 where
632 charToInp :: Char -> Inp
633 charToInp c = Inp
634 { inpPats = [PosNext (PatternChar c)]
635 , inpPronunciations = []
636 }
637 runInp :: [Pos] -> PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
638 runInp oks pat inp =
639 traceShow ("runInp"::Text, ("oks"::Text) := oks, ("cur"::Text) := LZ.safeCursor inp) $
640 case pat of
641 PatEnd end ->
642 -- the pattern ends
643 inp
644 & LZ.insert Inp
645 { inpPats = oks & List.reverse
646 , inpPronunciations = end
647 }
648 & LZ.right
649 & runInp [] initPats
650 PatTree pats ->
651 -- the pattern may go on
652 case inp & LZ.safeCursor of
653 Nothing ->
654 inp
655 & runPat [] oks [PosNext PatternLexicalBorder] pats
656 Just cur ->
657 inp & LZ.delete
658 & runPat [] oks (inpPats cur & List.sort) pats
659
660 runPat :: [Pos] -> [Pos] -> [Pos] -> Map Pos PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
661 runPat kos oks todos pats inp =
662 traceShow ( "runPat"::Text
663 , ("kos"::Text) := kos
664 , ("oks"::Text) := oks
665 , ("todos"::Text) := todos
666 , ("cur"::Text) :=LZ.safeCursor inp
667 ) $
668 case todos of
669 [] | LZ.endp inp -> inp
670 & runInp kos (PatEnd [])
671 & runInp oks (PatEnd [])
672 [] ->
673 -- nothing left to advance the pattern
674 --traceShow ("runPat/[]"::Text) $
675 inp
676 & (if null kos then id else runInp kos (PatEnd []))
677 & runInp oks (PatTree pats)
678 k:ks ->
679 case pats & Map.lookup k of
680 -- the pattern ends
681 Just (PatEnd end) ->
682 --traceShow ("runPat/End"::Text) $
683 -- ks forgotten
684 inp
685 & (if null kos then id else runInp kos (PatEnd []))
686 & runInp (k:oks) (PatEnd end)
687 -- the pattern advances
688 Just (PatTree nextPats) ->
689 --traceShow ("runPat/Node"::Text) $
690 inp & runPat kos (k:oks) ks nextPats
691 -- the pattern does not advance
692 Nothing ->
693 inp & runPat (k:kos) oks ks pats
694 -}
695
696
697 data Lexeme
698 = LexemeBorder
699 | LexemeVowel
700 | LexemeSemiVowel
701 | LexemeConsonant
702 | LexemeDoubleConsonant
703 | LexemeSilent
704 | LexemeMeaning ShortText
705 | -- | `LexemeChar` is last to have priority when using `Map.toDescList`
706 LexemeChar Char
707 deriving (Eq, Ord, Show)
708
709 -- data Sound
710 -- = SoundVowel
711 -- | SoundConsonant
712 -- deriving (Eq, Ord, Show)
713
714 {-
715 newtype Lexemes = Lexemes { unLexemes :: [Lexeme] }
716 deriving (Eq, Ord, Show)
717 instance P.Stream Lexemes where
718 type Token Lexemes = Lexeme
719 type Tokens Lexemes = Lexemes
720 tokensToChunk _px = Lexemes
721 chunkToTokens _px = unLexemes
722 chunkLength _px = unLexemes >>> List.length
723 chunkEmpty _px = unLexemes >>> List.null
724 take1_ = unLexemes >>> P.take1_ >>> coerce
725 takeN_ n = unLexemes >>> P.takeN_ n >>> coerce
726 takeWhile_ p = unLexemes >>> P.takeWhile_ p >>> coerce
727
728 instance IsString Lexemes where
729 fromString s =
730 s
731 & Text.pack
732 & runLexer
733 & either
734 errorShow
735 ((`appEndo` []) >>> Lexemes)
736 -}
737
738 newtype RuleLexemes = RuleLexemes {unRuleLexemes :: [Lexeme]}
739 deriving (Eq, Ord, Show)
740 instance HasTypeDefault RuleLexemes where
741 typeDefault = RuleLexemes typeDefault
742 instance Semigroup RuleLexemes where
743 RuleLexemes x <> RuleLexemes y = RuleLexemes (x <> y)
744 instance Monoid RuleLexemes where
745 mempty = RuleLexemes mempty
746 instance IsList RuleLexemes where
747 type Item RuleLexemes = Lexeme
748 fromList = RuleLexemes
749 toList = unRuleLexemes
750 instance IsString RuleLexemes where
751 fromString s =
752 s
753 & Text.pack
754 & runLexer
755 & either
756 errorShow
757 ( List.dropWhileEnd (== LexemeBorder)
758 >>> List.dropWhile (== LexemeBorder)
759 >>> RuleLexemes
760 )
761
762 newtype InputLexemes = InputLexemes {unInputLexemes :: [Lexeme]}
763 deriving (Eq, Ord, Show)
764 instance HasTypeDefault InputLexemes where
765 typeDefault = InputLexemes typeDefault
766 instance Semigroup InputLexemes where
767 InputLexemes x <> InputLexemes y = InputLexemes (x <> y)
768 instance Monoid InputLexemes where
769 mempty = InputLexemes mempty
770 instance IsList InputLexemes where
771 type Item InputLexemes = Lexeme
772 fromList = InputLexemes
773 toList = unInputLexemes
774 instance IsString InputLexemes where
775 fromString s =
776 s
777 & Text.pack
778 & runLexer
779 & either errorShow InputLexemes
780
781 instance P.ShowErrorComponent () where
782 showErrorComponent = show
783 errorComponentLen _ = 2
784 instance P.VisualStream [Lexeme] where
785 showTokens _s = show
786 tokensLength _s xs = xs <&> (show >>> List.length) & sum
787 instance P.TraversableStream [Lexeme] where
788 reachOffset off pos = (Nothing, pos{P.pstateOffset = P.pstateOffset pos + off})
789
790 data LexemeTag
791 = LexemeTagLetter
792 | LexemeTagSpace
793 | LexemeTagPunctuation
794 | LexemeTagSeparator
795 | LexemeTagMark
796 | LexemeTagSymbol
797 | LexemeTagDefinition
798 | LexemeTagBorder
799 deriving (Eq, Ord, Show)
800 deriving instance Ord (IPA.Syllable [])
801 deriving instance Ord IPA.SuprasegmentalFeature
802 deriving instance Ord IPA.SegmentalFeature
803 deriving instance Ord IPA.Sibilance
804 deriving instance Ord IPA.Manner
805 deriving instance Ord IPA.Phonation
806 deriving instance Ord IPA.Roundedness
807 deriving instance Ord IPA.Height
808 deriving instance Ord IPA.Vowel
809 deriving instance Ord IPA.Consonant
810 deriving instance Ord IPA.Segment
811
812 {-
813 tableToMatch :: Table -> [Lexeme] -> [Pronunciations]
814 tableToMatch tbl = loop
815 where
816 loop prevBorder = \case
817 InputText inp ->
818 [ (matchingLength, )
819 | (trans, transMach) <- chunk & chunkMachine & machineAlts & Map.toList
820 , let matchingLength = transMatchingLength input trans
821 , 0 < matchingLength || not (isTransConsume trans)
822 , let (inputRead, inputRest) = input & Text.splitAt matchingLength
823 ]
824 & Map.fromListWith (\new old -> old)
825 & Map.lookupMax
826 <&> snd
827
828 -}
829 tableHtml :: Table -> IO HTML.Html
830 tableHtml tbl = do
831 dataPath <- Self.getDataDir <&> File.normalise
832 let title :: String = "LexerDict"
833 let pageOrientation = Paper.PageOrientationPortrait
834 let pageSize = Paper.PageSizeA4
835 let partLangue = LangueFrançais
836 return do
837 HTML.docTypeHtml do
838 HTML.head do
839 HTML.title $ title & HTML.toHtml
840 forM_
841 ( [ "styles/Paper.css"
842 , "styles/French/Lexer.css"
843 , "styles/Rosetta/Reading.css"
844 ]
845 & list
846 )
847 \cssFile ->
848 HTML.link
849 ! HA.rel "stylesheet"
850 ! HA.type_ "text/css"
851 ! HA.href (dataPath </> cssFile & HTML.toValue)
852 HTML.styleCSS $ HTML.cssPrintPage pageOrientation pageSize
853 -- HTML.styleCSS $ pageDifficulties & difficultyCSS
854 HTML.body
855 ! classes ["A4", "french-lexer"]
856 $ do
857 "\n"
858 let rulesChunks = tbl & Map.toList & chunksOf 50
859 forM_ rulesChunks \rules ->
860 HTML.section
861 ! classes
862 [ "sheet"
863 ]
864 ! styles
865 []
866 $ do
867 forM_ (rules & List.zip [1 :: Int ..]) \(ruleIndex, (rulePat, Rule{..})) -> do
868 "\n"
869 HTML.div
870 ! classes
871 [ "dict-entry"
872 , if even ruleIndex then "even" else "odd"
873 ]
874 ! styles
875 []
876 $ do
877 "\n"
878 HTML.div
879 ! classes
880 [ "dict-key"
881 , "lang-" <> className partLangue
882 ]
883 ! styles
884 []
885 -- "grid-template-columns" :=
886 -- (0.5 & cm & HTML.toCSS)
887 -- & List.replicate lexerDictMaxKeyLength
888 -- & List.unwords
889
890 $ do
891 forM_ (["model"] :: [String]) \rowKind -> do
892 forM_ (rulePat & unRuleLexemes) \ruleChar -> do
893 -- let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
894 case ruleChar of
895 LexemeChar c -> do
896 HTML.span
897 ! classes
898 [ "dict-key-cell"
899 , rowKind
900 -- , "script-" <> className uniScript
901 ]
902 $ do
903 c & HTML.toHtml
904 _ -> ""
905 HTML.div
906 ! classes
907 [ "dict-pronunciation"
908 ]
909 $ do
910 -- HTML.span ! classes ["arrow"] $ "→"
911 case rulePron of
912 Pronunciations
913 { unPronunciations =
914 all (snd >>> pronunciationIPABroad >>> all IPA.isSilent) -> True
915 } -> ""
916 Pronunciations{unPronunciations = is} ->
917 is
918 & foldMap (snd >>> pronunciationIPABroad >>> foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic))
919 & HTML.toHtml
920 HTML.div
921 ! classes
922 [ "dict-lexeme"
923 ]
924 $ do
925 -- HTML.span ! classes ["arrow"] $ "→"
926 forM_ (ruleExamples & Map.toList) \(_inp, Pronunciation{..}) -> do
927 HTML.span
928 ! classes
929 []
930 $ do
931 case pronunciationIPABroad of
932 [] -> pronunciationText & HTML.toHtml
933 _ -> pronunciationIPABroad & foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic) & HTML.toHtml
934 "; "