]> Git — Sourcephile - julm/worksheets.git/blob - src/Language/Pronunciation.hs
wip
[julm/worksheets.git] / src / Language / Pronunciation.hs
1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE RankNTypes #-}
4 {-# LANGUAGE StandaloneDeriving #-}
5
6 module Language.Pronunciation where
7
8 import Control.Applicative (Alternative (..))
9 import Control.Monad.Combinators qualified as P
10 import Control.Monad.Trans.Class qualified as MT
11 import Control.Monad.Trans.State qualified as MT
12 import Data.List qualified as List
13 import Data.List.Zipper qualified as LZ
14 import Data.Map.Strict qualified as Map
15 import Data.Set qualified as Set
16 import Data.Text qualified as Text
17 import Data.Text.Short qualified as TextShort
18 import Data.Traversable (traverse)
19 import Language
20 import Paths_worksheets qualified as Self
21 import System.FilePath.Posix ((</>))
22 import System.FilePath.Posix qualified as File
23 import Text.Blaze.Html5.Attributes qualified as HA
24 import Text.Megaparsec qualified as P
25 import Worksheets.Utils.Char qualified as Char
26 import Worksheets.Utils.HTML (className, classes, styles, (!))
27 import Worksheets.Utils.HTML qualified as HTML
28 import Worksheets.Utils.IPA qualified as IPA
29 import Worksheets.Utils.Paper qualified as Paper
30 import Worksheets.Utils.Prelude
31
32 data Pronunciation = Pronunciation
33 { pronunciationIPABroad :: [IPA.Syllable []]
34 , pronunciationText :: Text
35 }
36 deriving (Eq, Ord, Show)
37 instance Semigroup Pronunciation where
38 x <> y =
39 Pronunciation
40 { pronunciationIPABroad = pronunciationIPABroad x <> pronunciationIPABroad y
41 , pronunciationText =
42 [pronunciationText x, pronunciationText y]
43 & List.filter (/= "")
44 & Text.intercalate "."
45 }
46 instance IsList Pronunciation where
47 type Item Pronunciation = IPA.Syllable []
48 toList = pronunciationIPABroad
49
50 -- fromList :: HasCallStack => [Item Pronunciation] -> Pronunciation
51 fromList l =
52 Pronunciation
53 { pronunciationIPABroad = ipa
54 , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
55 }
56 where
57 ipa =
58 l
59 & mapButLast (IPA.WithSuprasegmentalFeature IPA.Break)
60 & fromList
61 instance IsString Pronunciation where
62 fromString s =
63 Pronunciation
64 { pronunciationIPABroad = ipa
65 , pronunciationText = ipa & foldMap (IPA.toIPA_ >>> IPA.unIPA)
66 }
67 where
68 ipa =
69 s
70 & Text.pack
71 & IPA.parseSyllables @[]
72 & either errorShow id
73
74 {-
75 data PronunciationKey = PronunciationKey
76 { pronunciationKeyText :: Text
77 , pronunciationKeyPron :: Pronunciation
78 , pronunciationKey
79 }
80 -}
81
82 newtype Pronunciations = Pronunciations
83 { unPronunciations :: [(RuleLexemes, Pronunciation)]
84 }
85 deriving (Eq, Ord)
86 deriving newtype (Show)
87 deriving newtype (Semigroup)
88 deriving newtype (Monoid)
89 joinPronunciations :: Pronunciations -> Pronunciations
90 joinPronunciations (Pronunciations ps) =
91 Pronunciations
92 [ input :=
93 Pronunciation
94 { pronunciationIPABroad = ipa
95 , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
96 }
97 ]
98 where
99 input :: RuleLexemes
100 ipa :: [IPA.Syllable []]
101 (input, ipa) =
102 ps
103 & List.foldr
104 ( \(inp, Pronunciation{pronunciationIPABroad}) (suffix, l, acc) ->
105 case pronunciationIPABroad of
106 [] -> (inp <> suffix, IPA.Syllable [], acc)
107 [syl@(IPA.Syllable [])] -> (inp <> suffix, syl, acc)
108 [syl] -> (inp <> suffix, IPA.Syllable [], (syl <> l) : acc)
109 [sylL, sylR] -> (inp <> suffix, sylL, glueSyllableToTheRight sylR acc)
110 _ -> errorShow pronunciationIPABroad
111 )
112 ( ""
113 , IPA.Syllable []
114 , []
115 )
116 & ( \(i, l, acc) ->
117 ( i
118 , glueSyllableToTheRight l acc
119 & mapButLast (IPA.setSuprasegmentalFeatures [IPA.Break])
120 )
121 )
122 glueSyllableToTheRight ::
123 IPA.Syllable [] ->
124 [IPA.Syllable []] ->
125 [IPA.Syllable []]
126 glueSyllableToTheRight x y =
127 case y of
128 [] -> [x]
129 [yL] -> [x <> yL]
130 yL : yR -> x <> yL : yR
131 instance IsList Pronunciations where
132 type Item Pronunciations = (RuleLexemes, Pronunciation)
133 toList = unPronunciations
134 fromList l = Pronunciations{unPronunciations = l & fromList}
135
136 {-
137 instance IsString Pronunciations where
138 fromString = \case
139 "" -> Pronunciations "" [IPA.Syllable [IPA.Zero]]
140 s -> Pronunciations (s & Text.pack) $
141 fromString s
142 & IPA.parseSyllables @[]
143 & either errorShow id
144 -}
145 data ExampleLiteral = ExampleLiteral
146 { exampleLiteralText :: ShortText
147 , exampleLiteralTags :: Set LiteralTag
148 , exampleLiteralMeaning :: ShortText
149 }
150 deriving (Eq, Ord, Show)
151 instance IsString ExampleLiteral where
152 fromString s =
153 ExampleLiteral
154 { exampleLiteralText = s & fromString
155 , exampleLiteralTags = Set.empty
156 , exampleLiteralMeaning = ""
157 }
158 data LiteralTag
159 = LiteralTagOccurence
160 | LiteralTagMeta
161 | LiteralTagSilent
162 deriving (Eq, Ord, Show)
163 exampleLiteralsText :: [ExampleLiteral] -> ShortText
164 exampleLiteralsText ls = ls <&> exampleLiteralText & mconcat
165
166 hyphen =
167 ExampleLiteral
168 { exampleLiteralText = "-"
169 , exampleLiteralTags = [LiteralTagMeta] & Set.fromList
170 , exampleLiteralMeaning = ""
171 }
172 occurence lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagOccurence}
173 silent lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagSilent}
174
175 type SyllableText = ShortText
176 type SyllableBroad = IPA.Syllable []
177 type SyllablesTable = Map SyllableText (Map SyllableText [([ExampleLiteral], SyllableBroad)])
178
179 data Adjacent
180 = AdjacentBorder
181 | AdjacentVowel
182 | AdjacentConsonant
183 deriving (Eq, Ord, Show)
184 data Variant
185 = VariantDefinition Text
186 | VariantStress
187 deriving (Eq, Ord, Show)
188 data Rule = Rule
189 { -- , ruleStress :: Bool
190 -- , ruleDefinition :: Maybe Text
191
192 -- [ "e" := ["ɛ"] , "x" := ["g","z"] , "er" := ["ɛʁ"] , "cice" := ["sis"] ]
193 -- [ "exercice" := ["ɛg.zɛʁ.sis"]
194 -- ]
195 rulePron :: Pronunciations
196 , ruleExamples :: Map InputLexemes Pronunciation
197 }
198 deriving (Eq, Ord, Show)
199
200 rule =
201 Rule
202 { rulePron = Pronunciations{unPronunciations = []}
203 , ruleExamples = mempty
204 }
205 space = rule
206 word
207 , begining
208 , ending ::
209 RuleLexemes -> RuleLexemes
210 word = begining >>> ending
211 begining = after [LexemeBorder]
212 ending = before [LexemeBorder]
213 before ls r = RuleLexemes (unRuleLexemes r <> ls)
214 after ls r = RuleLexemes (ls <> unRuleLexemes r)
215 meaning r d = RuleLexemes (unRuleLexemes r <> [LexemeMeaning d])
216
217 type Table = Map RuleLexemes Rule
218 examples :: Table -> Map InputLexemes Pronunciation
219 examples tbl =
220 [ v & ruleExamples
221 | v <- tbl & Map.elems
222 ]
223 & Map.unionsWith (\new old -> if new == old then new else errorShow (new, old))
224
225 data Pron = Pron
226 { pronInput :: [Lexeme]
227 , pronRule :: Rule
228 }
229 deriving (Eq, Show)
230
231 data Syl = Syl
232 { sylText :: Text
233 , sylDependsOnBefore :: Bool
234 , sylDependsOnAfter :: Bool
235 , sylDependsOnMeaning :: Bool
236 , sylSound :: Text -- [IPA.Syllable []]
237 , sylIndex :: Int
238 , sylSilent :: Bool
239 , sylSplit :: Bool
240 }
241 deriving (Eq, Show)
242
243 addIndexes :: [[Either Char Pron]] -> [[Syl]]
244 addIndexes = go 0
245 where
246 go _idx [] = []
247 go idx (prons : next) = List.reverse prons' : go idx' next
248 where
249 (idx', prons') =
250 prons
251 & List.foldl'
252 ( \(i, is) -> \case
253 Left c ->
254 ( i
255 , Syl
256 { sylText = Text.singleton c
257 , sylDependsOnAfter = False
258 , sylDependsOnBefore = False
259 , sylDependsOnMeaning = False
260 , sylSound = []
261 , sylIndex = i
262 , sylSilent = True
263 , sylSplit = False
264 }
265 : is
266 )
267 Right Pron{pronRule = Rule{rulePron = Pronunciations{unPronunciations = ps}}} ->
268 ps
269 & List.foldl'
270 ( \(j, js) (t, Pronunciation{..}) ->
271 let sylText = t & unRuleLexemes & lexemesChars & Text.pack
272 in case pronunciationIPABroad of
273 []
274 | not (Text.null pronunciationText) ->
275 ( j + 1
276 , Syl
277 { sylText
278 , sylSound = pronunciationText
279 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
280 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
281 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
282 , sylIndex = j + 1
283 , sylSilent = False
284 , sylSplit = False
285 }
286 : js
287 )
288 [IPA.Syllable []]
289 | Text.null pronunciationText ->
290 ( j
291 , case js of
292 [] ->
293 Syl
294 { sylText
295 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
296 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
297 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
298 , sylSound = ""
299 , sylIndex = j
300 , sylSilent = True
301 , sylSplit = False
302 }
303 : js
304 j0@Syl{sylText = j0t} : jss -> j0{sylText = j0t <> sylText} : jss
305 )
306 syls
307 | (syls & all IPA.isSilent) && Text.null pronunciationText ->
308 ( j
309 , Syl
310 { sylText
311 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
312 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
313 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
314 , sylSound = ""
315 , sylIndex = j
316 , sylSilent = True
317 , sylSplit = False
318 }
319 : js
320 )
321 _syls@[_] ->
322 ( j + 1
323 , Syl
324 { sylText
325 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
326 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
327 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
328 , sylSound = pronunciationText
329 , sylIndex = j + 1
330 , sylSilent = False
331 , sylSplit = False
332 }
333 : js
334 )
335 _syls@[_, _] ->
336 ( j
337 , Syl
338 { sylText
339 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
340 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
341 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
342 , sylSound = pronunciationText
343 , sylIndex = j
344 , sylSilent = False
345 , sylSplit = True
346 }
347 : js
348 )
349 syls -> errorShow syls
350 )
351 (i, is)
352 where
353 sylDependsOnAfter =
354 List.reverse >>> \case
355 LexemeBorder : _ -> True
356 LexemeSilent : _ -> True
357 LexemeConsonant : _ -> True
358 LexemeDoubleConsonant : _ -> True
359 LexemeVowel : _ -> True
360 LexemeSemiVowel : _ -> True
361 _ -> False
362 sylDependsOnBefore =
363 \case
364 LexemeBorder : _ -> True
365 LexemeSilent : _ -> True
366 LexemeConsonant : _ -> True
367 LexemeDoubleConsonant : _ -> True
368 LexemeVowel : _ -> True
369 LexemeSemiVowel : _ -> True
370 _ -> False
371 sylDependsOnMeaning =
372 List.reverse >>> \case
373 LexemeMeaning{} : _ -> True
374 _ -> False
375 )
376 (idx, [])
377
378 withCapital :: [(RuleLexemes, Rule)] -> [(RuleLexemes, Rule)]
379 withCapital =
380 foldMap \(RuleLexemes pat, rul) ->
381 [ (RuleLexemes pat, rul)
382 ,
383 ( RuleLexemes (withCapitalLexemes pat)
384 , rul
385 { rulePron = rul & rulePron & withCapitalPronunciations
386 , ruleExamples =
387 rul
388 & ruleExamples
389 & Map.mapKeys
390 ( unInputLexemes
391 >>> withCapitalLexemes
392 >>> InputLexemes
393 )
394 }
395 )
396 ]
397 where
398 withCapitalPronunciations (Pronunciations []) = Pronunciations []
399 withCapitalPronunciations (Pronunciations ((t, p) : ps)) =
400 Pronunciations ((RuleLexemes $ withCapitalLexemes $ unRuleLexemes t, p) : ps)
401 withCapitalLexemes (LexemeChar x : xs) = LexemeChar (Char.toUpper x) : xs
402 withCapitalLexemes (x : xs) = x : withCapitalLexemes xs
403 withCapitalLexemes [] = []
404
405 lexemesChars :: [Lexeme] -> [Char]
406 lexemesChars p =
407 p & foldMap \case
408 LexemeChar c -> [c]
409 _ -> []
410
411 run ::
412 Table ->
413 Text ->
414 Either
415 ( Either
416 (P.ParseErrorBundle Text ())
417 (P.ParseErrorBundle [Lexeme] ())
418 )
419 [Either Char Pron]
420 run rules inp =
421 inp
422 & runLexer
423 & either (Left . Left) \lexs ->
424 lexs
425 & runParser rules
426 & either (Left . Right) Right
427
428 runParser :: Table -> [Lexeme] -> Either (P.ParseErrorBundle [Lexeme] ()) [Either Char Pron]
429 runParser tbl inp = inp & P.runParser (parser tbl) "input"
430
431 parseLiterals ::
432 Table ->
433 [ExampleLiteral] ->
434 Either
435 ( Either
436 (P.ParseErrorBundle Text ())
437 (P.ParseErrorBundle [Lexeme] ())
438 )
439 [Either Char Pron]
440 parseLiterals rules inp =
441 inp
442 & traverse
443 ( \ExampleLiteral{..} ->
444 exampleLiteralText
445 & TextShort.toText
446 & runLexer
447 <&> ( <>
448 [ LexemeMeaning exampleLiteralMeaning
449 | exampleLiteralMeaning & TextShort.null & not
450 ]
451 )
452 )
453 & either (Left . Left) \lexs ->
454 lexs
455 & mconcat
456 & runParser rules
457 & either (Left . Right) Right
458
459 parser :: Table -> P.Parsec () [Lexeme] [Either Char Pron]
460 parser tbl = do
461 res <- P.many $ (Just . Right) <$> parseRules <|> parseChar
462 P.eof
463 return $ res & catMaybes
464 where
465 -- Match one of the rules, trying longuest first
466 parseRules :: P.Parsec () [Lexeme] Pron
467 parseRules =
468 P.choice
469 [ parseRule r
470 | r <- tbl & Map.toDescList
471 ]
472 {-
473 <|> P.choice
474 [ parseRule
475 ( RuleLexemes $
476 rulePat & unRuleLexemes <&> \case
477 LexemeChar c -> LexemeChar (c & Char.toUpper)
478 x -> x
479 , curRule
480 )
481 | (rulePat, curRule) <- tbl & Map.toDescList
482 ]
483 -}
484 parseRule (rulePat, curRule@Rule{..}) =
485 P.try do
486 let
487 pat = rulePat & unRuleLexemes
488 patSep = (`List.elem` list [LexemeVowel, LexemeSemiVowel, LexemeConsonant, LexemeSilent])
489 -- (patEnd, patBegin) = pat & List.reverse & List.span patSep
490 patBegin = pat & List.dropWhileEnd patSep
491 patEnd = pat & List.reverse & List.takeWhile patSep & List.reverse
492 -- parse without the ending Lexeme{Vowel,SemiVowel,Consonant}
493 P.chunk patBegin
494 inpAfterBegin <- P.getInput
495 unless (List.null patEnd) do
496 inpWithAhead <- parseAhead
497 -- traceShowM ("inpWithAhead"::Text, inpWithAhead)
498 P.setInput inpWithAhead
499 P.chunk patEnd & void
500 -- insert the Lexeme{Vowel,SemiVowel,Consonant} from the output of the current rule
501 let lastSound =
502 rulePron
503 & unPronunciations
504 & List.reverse
505 & headMaybe
506 & maybe
507 []
508 ( snd
509 >>> pronunciationIPABroad
510 >>> List.reverse
511 >>> headMaybe
512 >>> maybe [] (IPA.syllableToSegments >>> List.reverse >>> lexemeHeadSound)
513 )
514 P.setInput $ lastSound <> inpAfterBegin
515 return Pron{pronInput = pat, pronRule = curRule}
516 parseChar :: P.Parsec () [Lexeme] (Maybe (Either Char Pron))
517 parseChar =
518 P.anySingle <&> \case
519 LexemeChar c -> Just $ Left c
520 _ -> Nothing
521 parseAhead :: P.Parsec () [Lexeme] [Lexeme]
522 parseAhead = do
523 nextStep <- P.observing $ Right <$> parseRules <|> Left <$> P.anySingle
524 -- traceShowM ("nextStep"::Text, nextStep & either (\err -> Left ()) Right)
525 case nextStep of
526 Right (Right Pron{pronInput, pronRule}) -> do
527 let x =
528 pronRule
529 & rulePron
530 & unPronunciations
531 & headMaybe
532 & maybe
533 []
534 ( snd
535 >>> pronunciationIPABroad
536 >>> headMaybe
537 >>> maybe [] (IPA.syllableToSegments >>> lexemeHeadSound)
538 )
539 inp <- P.getInput
540 return $ x <> pronInput <> inp
541 Right (Left lex) -> do
542 parseAhead <&> (lex :)
543 Left{} -> P.getInput
544 lexemeHeadSound :: [_] -> [Lexeme]
545 lexemeHeadSound =
546 headMaybe >>> fmap IPA.dropSegmentalFeatures >>> \case
547 Just IPA.Zero{} -> [LexemeSilent]
548 Just IPA.Vowel{} -> [LexemeVowel]
549 Just (IPA.Consonant consonant) -> do
550 case consonant of
551 IPA.Pulmonic _phonation _place IPA.Approximant -> [LexemeSemiVowel]
552 IPA.Ejective _place IPA.Approximant -> [LexemeSemiVowel]
553 _ -> [LexemeConsonant]
554 _ -> [] -- error
555
556 runLexer :: Text -> Either (P.ParseErrorBundle Text ()) [Lexeme]
557 runLexer inp = inp & P.runParser lexer "input"
558
559 exampleLiteralsLexemes :: [ExampleLiteral] -> [Lexeme]
560 exampleLiteralsLexemes ls =
561 ls & foldMap \ExampleLiteral{..} ->
562 unRuleLexemes (fromString (TextShort.unpack exampleLiteralText))
563 <> [ LexemeMeaning exampleLiteralMeaning
564 ]
565
566 lexer :: P.Parsec () Text [Lexeme]
567 lexer = do
568 lls <- P.many do
569 P.choice $
570 list
571 [ P.takeWhile1P Nothing Char.isSpace >>= \cs ->
572 return [LexemeChar c | c <- cs & Text.unpack]
573 , do
574 cs <- P.takeWhile1P Nothing Char.isLetter
575 mean <- (<|> return []) $ P.try do
576 P.single '{'
577 m <- P.takeWhile1P Nothing (/= '}')
578 P.single '}'
579 return [LexemeMeaning (TextShort.fromText m)]
580 return $
581 LexemeBorder
582 : [LexemeChar c | c <- cs & Text.unpack]
583 <> mean
584 <> [LexemeBorder]
585 , P.takeWhile1P Nothing Char.isNumber >>= \cs ->
586 return (LexemeBorder : ([LexemeChar c | c <- cs & Text.unpack] <> [LexemeBorder]))
587 , P.satisfy Char.isSymbol >>= \c ->
588 return [LexemeChar c]
589 , P.satisfy Char.isSeparator >>= \c ->
590 return [LexemeChar c]
591 , P.satisfy Char.isMark >>= \c ->
592 return [LexemeChar c]
593 , P.satisfy Char.isPunctuation >>= \c ->
594 return [LexemeChar c]
595 ]
596 P.eof
597 return $ mconcat lls
598
599 words :: [Either Char Pron] -> [[Either Char Pron]]
600 words [] = []
601 words prons = word0 : words next
602 where
603 (word0, rest) = prons & List.span (isSep >>> not)
604 (_sep, next) = rest & List.span isSep
605 isSep = \case
606 Left c | c & Char.isSpace -> True
607 _ -> False
608
609 {-
610 case statePats st & Map.lookup k of
611 Nothing -> st
612 Just (PatTree pats) ->
613 loop st {statePats = pats, stateBuffer = k : stateBuffer st}
614 Just (PatEnd end) ->
615 loop st { statePats = initPats
616 , stateBuffer = []
617 , stateInput = stateInput st
618 & LZ.insert Inp
619 { inpPats = stateBuffer st & List.reverse
620 , inpPronunciations = end
621 }
622 & LZ.right
623 }
624 -}
625
626 {-
627 parse :: PatTree -> Text -> [Inp]
628 parse initPats input =
629 let inpZip = input & Text.unpack & fmap charToInp & LZ.fromList in
630 runInp [] initPats inpZip & LZ.toList
631 where
632 charToInp :: Char -> Inp
633 charToInp c = Inp
634 { inpPats = [PosNext (PatternChar c)]
635 , inpPronunciations = []
636 }
637 runInp :: [Pos] -> PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
638 runInp oks pat inp =
639 traceShow ("runInp"::Text, ("oks"::Text) := oks, ("cur"::Text) := LZ.safeCursor inp) $
640 case pat of
641 PatEnd end ->
642 -- the pattern ends
643 inp
644 & LZ.insert Inp
645 { inpPats = oks & List.reverse
646 , inpPronunciations = end
647 }
648 & LZ.right
649 & runInp [] initPats
650 PatTree pats ->
651 -- the pattern may go on
652 case inp & LZ.safeCursor of
653 Nothing ->
654 inp
655 & runPat [] oks [PosNext PatternLexicalBorder] pats
656 Just cur ->
657 inp & LZ.delete
658 & runPat [] oks (inpPats cur & List.sort) pats
659
660 runPat :: [Pos] -> [Pos] -> [Pos] -> Map Pos PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
661 runPat kos oks todos pats inp =
662 traceShow ( "runPat"::Text
663 , ("kos"::Text) := kos
664 , ("oks"::Text) := oks
665 , ("todos"::Text) := todos
666 , ("cur"::Text) :=LZ.safeCursor inp
667 ) $
668 case todos of
669 [] | LZ.endp inp -> inp
670 & runInp kos (PatEnd [])
671 & runInp oks (PatEnd [])
672 [] ->
673 -- nothing left to advance the pattern
674 --traceShow ("runPat/[]"::Text) $
675 inp
676 & (if null kos then id else runInp kos (PatEnd []))
677 & runInp oks (PatTree pats)
678 k:ks ->
679 case pats & Map.lookup k of
680 -- the pattern ends
681 Just (PatEnd end) ->
682 --traceShow ("runPat/End"::Text) $
683 -- ks forgotten
684 inp
685 & (if null kos then id else runInp kos (PatEnd []))
686 & runInp (k:oks) (PatEnd end)
687 -- the pattern advances
688 Just (PatTree nextPats) ->
689 --traceShow ("runPat/Node"::Text) $
690 inp & runPat kos (k:oks) ks nextPats
691 -- the pattern does not advance
692 Nothing ->
693 inp & runPat (k:kos) oks ks pats
694 -}
695
696 data Lexeme
697 = LexemeBorder
698 | LexemeVowel
699 | LexemeSemiVowel
700 | LexemeConsonant
701 | LexemeDoubleConsonant
702 | LexemeSilent
703 | LexemeMeaning ShortText
704 | -- | `LexemeChar` is last to have priority when using `Map.toDescList`
705 LexemeChar Char
706 deriving (Eq, Ord, Show)
707
708 -- data Sound
709 -- = SoundVowel
710 -- | SoundConsonant
711 -- deriving (Eq, Ord, Show)
712
713 {-
714 newtype Lexemes = Lexemes { unLexemes :: [Lexeme] }
715 deriving (Eq, Ord, Show)
716 instance P.Stream Lexemes where
717 type Token Lexemes = Lexeme
718 type Tokens Lexemes = Lexemes
719 tokensToChunk _px = Lexemes
720 chunkToTokens _px = unLexemes
721 chunkLength _px = unLexemes >>> List.length
722 chunkEmpty _px = unLexemes >>> List.null
723 take1_ = unLexemes >>> P.take1_ >>> coerce
724 takeN_ n = unLexemes >>> P.takeN_ n >>> coerce
725 takeWhile_ p = unLexemes >>> P.takeWhile_ p >>> coerce
726
727 instance IsString Lexemes where
728 fromString s =
729 s
730 & Text.pack
731 & runLexer
732 & either
733 errorShow
734 ((`appEndo` []) >>> Lexemes)
735 -}
736
737 newtype RuleLexemes = RuleLexemes {unRuleLexemes :: [Lexeme]}
738 deriving (Eq, Ord, Show)
739 instance HasTypeDefault RuleLexemes where
740 typeDefault = RuleLexemes typeDefault
741 instance Semigroup RuleLexemes where
742 RuleLexemes x <> RuleLexemes y = RuleLexemes (x <> y)
743 instance Monoid RuleLexemes where
744 mempty = RuleLexemes mempty
745 instance IsList RuleLexemes where
746 type Item RuleLexemes = Lexeme
747 fromList = RuleLexemes
748 toList = unRuleLexemes
749 instance IsString RuleLexemes where
750 fromString s =
751 s
752 & Text.pack
753 & runLexer
754 & either
755 errorShow
756 ( List.dropWhileEnd (== LexemeBorder)
757 >>> List.dropWhile (== LexemeBorder)
758 >>> RuleLexemes
759 )
760
761 newtype InputLexemes = InputLexemes {unInputLexemes :: [Lexeme]}
762 deriving (Eq, Ord, Show)
763 instance HasTypeDefault InputLexemes where
764 typeDefault = InputLexemes typeDefault
765 instance Semigroup InputLexemes where
766 InputLexemes x <> InputLexemes y = InputLexemes (x <> y)
767 instance Monoid InputLexemes where
768 mempty = InputLexemes mempty
769 instance IsList InputLexemes where
770 type Item InputLexemes = Lexeme
771 fromList = InputLexemes
772 toList = unInputLexemes
773 instance IsString InputLexemes where
774 fromString s =
775 s
776 & Text.pack
777 & runLexer
778 & either errorShow InputLexemes
779
780 instance P.ShowErrorComponent () where
781 showErrorComponent = show
782 errorComponentLen _ = 2
783 instance P.VisualStream [Lexeme] where
784 showTokens _s = show
785 tokensLength _s xs = xs <&> (show >>> List.length) & sum
786 instance P.TraversableStream [Lexeme] where
787 reachOffset off pos = (Nothing, pos{P.pstateOffset = P.pstateOffset pos + off})
788
789 data LexemeTag
790 = LexemeTagLetter
791 | LexemeTagSpace
792 | LexemeTagPunctuation
793 | LexemeTagSeparator
794 | LexemeTagMark
795 | LexemeTagSymbol
796 | LexemeTagDefinition
797 | LexemeTagBorder
798 deriving (Eq, Ord, Show)
799 deriving instance Ord (IPA.Syllable [])
800 deriving instance Ord IPA.SuprasegmentalFeature
801 deriving instance Ord IPA.SegmentalFeature
802 deriving instance Ord IPA.Sibilance
803 deriving instance Ord IPA.Manner
804 deriving instance Ord IPA.Phonation
805 deriving instance Ord IPA.Roundedness
806 deriving instance Ord IPA.Height
807 deriving instance Ord IPA.Vowel
808 deriving instance Ord IPA.Consonant
809 deriving instance Ord IPA.Segment
810
811 {-
812 tableToMatch :: Table -> [Lexeme] -> [Pronunciations]
813 tableToMatch tbl = loop
814 where
815 loop prevBorder = \case
816 InputText inp ->
817 [ (matchingLength, )
818 | (trans, transMach) <- chunk & chunkMachine & machineAlts & Map.toList
819 , let matchingLength = transMatchingLength input trans
820 , 0 < matchingLength || not (isTransConsume trans)
821 , let (inputRead, inputRest) = input & Text.splitAt matchingLength
822 ]
823 & Map.fromListWith (\new old -> old)
824 & Map.lookupMax
825 <&> snd
826
827 -}
828 tableHtml :: Table -> IO HTML.Html
829 tableHtml tbl = do
830 dataPath <- Self.getDataDir <&> File.normalise
831 let title :: String = "LexerDict"
832 let pageOrientation = Paper.PageOrientationPortrait
833 let pageSize = Paper.PageSizeA4
834 let partLangue = LangueFrançais
835 return do
836 HTML.docTypeHtml do
837 HTML.head do
838 HTML.title $ title & HTML.toHtml
839 forM_
840 ( [ "styles/Paper.css"
841 , "styles/French/Lexer.css"
842 , "styles/Rosetta/Reading.css"
843 ]
844 & list
845 )
846 \cssFile ->
847 HTML.link
848 ! HA.rel "stylesheet"
849 ! HA.type_ "text/css"
850 ! HA.href (dataPath </> cssFile & HTML.toValue)
851 HTML.styleCSS $ HTML.cssPrintPage pageOrientation pageSize
852 -- HTML.styleCSS $ pageDifficulties & difficultyCSS
853 HTML.body
854 ! classes ["A4", "french-lexer"]
855 $ do
856 "\n"
857 let rulesChunks = tbl & Map.toList & chunksOf 50
858 forM_ rulesChunks \rules ->
859 HTML.section
860 ! classes
861 [ "sheet"
862 ]
863 ! styles
864 []
865 $ do
866 forM_ (rules & List.zip [1 :: Int ..]) \(ruleIndex, (rulePat, Rule{..})) -> do
867 "\n"
868 HTML.div
869 ! classes
870 [ "dict-entry"
871 , if even ruleIndex then "even" else "odd"
872 ]
873 ! styles
874 []
875 $ do
876 "\n"
877 HTML.div
878 ! classes
879 [ "dict-key"
880 , "lang-" <> className partLangue
881 ]
882 ! styles
883 []
884 -- "grid-template-columns" :=
885 -- (0.5 & cm & HTML.toCSS)
886 -- & List.replicate lexerDictMaxKeyLength
887 -- & List.unwords
888
889 $ do
890 forM_ (["model"] :: [String]) \rowKind -> do
891 forM_ (rulePat & unRuleLexemes) \ruleChar -> do
892 -- let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
893 case ruleChar of
894 LexemeChar c -> do
895 HTML.span
896 ! classes
897 [ "dict-key-cell"
898 , rowKind
899 -- , "script-" <> className uniScript
900 ]
901 $ do
902 c & HTML.toHtml
903 _ -> ""
904 HTML.div
905 ! classes
906 [ "dict-pronunciation"
907 ]
908 $ do
909 -- HTML.span ! classes ["arrow"] $ "→"
910 case rulePron of
911 Pronunciations
912 { unPronunciations =
913 all (snd >>> pronunciationIPABroad >>> all IPA.isSilent) -> True
914 } -> ""
915 Pronunciations{unPronunciations = is} ->
916 is
917 & foldMap (snd >>> pronunciationIPABroad >>> foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic))
918 & HTML.toHtml
919 HTML.div
920 ! classes
921 [ "dict-lexeme"
922 ]
923 $ do
924 -- HTML.span ! classes ["arrow"] $ "→"
925 forM_ (ruleExamples & Map.toList) \(_inp, Pronunciation{..}) -> do
926 HTML.span
927 ! classes
928 []
929 $ do
930 case pronunciationIPABroad of
931 [] -> pronunciationText & HTML.toHtml
932 _ -> pronunciationIPABroad & foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic) & HTML.toHtml
933 "; "