]> Git — Sourcephile - julm/worksheets.git/blob - src/Language/Pronunciation.hs
update
[julm/worksheets.git] / src / Language / Pronunciation.hs
1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE RankNTypes #-}
4 {-# LANGUAGE StandaloneDeriving #-}
5 {-# OPTIONS_GHC -Wno-orphans #-}
6
7 module Language.Pronunciation where
8
9 import Control.Applicative (Alternative (..))
10 import Control.Monad.Combinators qualified as P
11 import Control.Monad.Trans.Class qualified as MT
12 import Control.Monad.Trans.State qualified as MT
13 import Data.List qualified as List
14 import Data.List.Zipper qualified as LZ
15 import Data.Map.Strict qualified as Map
16 import Data.Set qualified as Set
17 import Data.Text qualified as Text
18 import Data.Text.Short qualified as TextShort
19 import Data.Traversable (traverse)
20 import Language
21 import Paths_worksheets qualified as Self
22 import System.FilePath.Posix ((</>))
23 import System.FilePath.Posix qualified as File
24 import Text.Blaze.Html5.Attributes qualified as HA
25 import Text.Megaparsec qualified as P
26 import Worksheets.Utils.Char qualified as Char
27 import Worksheets.Utils.HTML (className, classes, styles, (!))
28 import Worksheets.Utils.HTML qualified as HTML
29 import Worksheets.Utils.IPA qualified as IPA
30 import Worksheets.Utils.Paper qualified as Paper
31 import Worksheets.Utils.Prelude
32
33 data Pronunciation = Pronunciation
34 { pronunciationIPABroad :: [IPA.Syllable []]
35 , pronunciationText :: Text
36 }
37 deriving (Eq, Ord, Show)
38 instance Semigroup Pronunciation where
39 x <> y =
40 Pronunciation
41 { pronunciationIPABroad = pronunciationIPABroad x <> pronunciationIPABroad y
42 , pronunciationText =
43 [pronunciationText x, pronunciationText y]
44 & List.filter (/= "")
45 & Text.intercalate "."
46 }
47 instance IsList Pronunciation where
48 type Item Pronunciation = IPA.Syllable []
49 toList = pronunciationIPABroad
50
51 -- fromList :: HasCallStack => [Item Pronunciation] -> Pronunciation
52 fromList l =
53 Pronunciation
54 { pronunciationIPABroad = ipa
55 , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
56 }
57 where
58 ipa =
59 l
60 & mapButLast (IPA.WithSuprasegmentalFeature IPA.Break)
61 & fromList
62 instance IsString Pronunciation where
63 fromString s =
64 Pronunciation
65 { pronunciationIPABroad = ipa
66 , pronunciationText = ipa & foldMap (IPA.toIPA_ >>> IPA.unIPA)
67 }
68 where
69 ipa =
70 s
71 & Text.pack
72 & IPA.parseSyllables @[]
73 & either errorShow id
74
75 {-
76 data PronunciationKey = PronunciationKey
77 { pronunciationKeyText :: Text
78 , pronunciationKeyPron :: Pronunciation
79 , pronunciationKey
80 }
81 -}
82
83 newtype Pronunciations = Pronunciations
84 { unPronunciations :: [(RuleLexemes, Pronunciation)]
85 }
86 deriving (Eq, Ord)
87 deriving newtype (Show)
88 deriving newtype (Semigroup)
89 deriving newtype (Monoid)
90 joinPronunciations :: Pronunciations -> Pronunciations
91 joinPronunciations (Pronunciations ps) =
92 Pronunciations
93 [ input :=
94 Pronunciation
95 { pronunciationIPABroad = ipa
96 , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
97 }
98 ]
99 where
100 input :: RuleLexemes
101 ipa :: [IPA.Syllable []]
102 (input, ipa) =
103 ps
104 & List.foldr
105 ( \(inp, Pronunciation{pronunciationIPABroad}) (suffix, l, acc) ->
106 case pronunciationIPABroad of
107 [] -> (inp <> suffix, IPA.Syllable [], acc)
108 [syl@(IPA.Syllable [])] -> (inp <> suffix, syl, acc)
109 [syl] -> (inp <> suffix, IPA.Syllable [], (syl <> l) : acc)
110 [sylL, sylR] -> (inp <> suffix, sylL, glueSyllableToTheRight sylR acc)
111 _ -> errorShow pronunciationIPABroad
112 )
113 ( ""
114 , IPA.Syllable []
115 , []
116 )
117 & ( \(i, l, acc) ->
118 ( i
119 , glueSyllableToTheRight l acc
120 & mapButLast (IPA.setSuprasegmentalFeatures [IPA.Break])
121 )
122 )
123 glueSyllableToTheRight ::
124 IPA.Syllable [] ->
125 [IPA.Syllable []] ->
126 [IPA.Syllable []]
127 glueSyllableToTheRight x y =
128 case y of
129 [] -> [x]
130 [yL] -> [x <> yL]
131 yL : yR -> x <> yL : yR
132 instance IsList Pronunciations where
133 type Item Pronunciations = (RuleLexemes, Pronunciation)
134 toList = unPronunciations
135 fromList l = Pronunciations{unPronunciations = l & fromList}
136
137 {-
138 instance IsString Pronunciations where
139 fromString = \case
140 "" -> Pronunciations "" [IPA.Syllable [IPA.Zero]]
141 s -> Pronunciations (s & Text.pack) $
142 fromString s
143 & IPA.parseSyllables @[]
144 & either errorShow id
145 -}
146 data ExampleLiteral = ExampleLiteral
147 { exampleLiteralText :: ShortText
148 , exampleLiteralTags :: Set LiteralTag
149 , exampleLiteralMeaning :: ShortText
150 }
151 deriving (Eq, Ord, Show)
152 instance IsString ExampleLiteral where
153 fromString s =
154 ExampleLiteral
155 { exampleLiteralText = s & fromString
156 , exampleLiteralTags = Set.empty
157 , exampleLiteralMeaning = ""
158 }
159 data LiteralTag
160 = LiteralTagOccurence
161 | LiteralTagMeta
162 | LiteralTagSilent
163 deriving (Eq, Ord, Show)
164 exampleLiteralsText :: [ExampleLiteral] -> ShortText
165 exampleLiteralsText ls = ls <&> exampleLiteralText & mconcat
166
167 hyphen =
168 ExampleLiteral
169 { exampleLiteralText = "-"
170 , exampleLiteralTags = [LiteralTagMeta] & Set.fromList
171 , exampleLiteralMeaning = ""
172 }
173 occurence lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagOccurence}
174 silent lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagSilent}
175
176 type SyllableText = ShortText
177 type SyllableBroad = IPA.Syllable []
178 type SyllablesTable = Map SyllableText (Map SyllableText [([ExampleLiteral], SyllableBroad)])
179
180 data Adjacent
181 = AdjacentBorder
182 | AdjacentVowel
183 | AdjacentConsonant
184 deriving (Eq, Ord, Show)
185 data Variant
186 = VariantDefinition Text
187 | VariantStress
188 deriving (Eq, Ord, Show)
189 data Rule = Rule
190 { -- , ruleStress :: Bool
191 -- , ruleDefinition :: Maybe Text
192
193 -- [ "e" := ["ɛ"] , "x" := ["g","z"] , "er" := ["ɛʁ"] , "cice" := ["sis"] ]
194 -- [ "exercice" := ["ɛg.zɛʁ.sis"]
195 -- ]
196 rulePron :: Pronunciations
197 , ruleExamples :: Map InputLexemes Pronunciation
198 }
199 deriving (Eq, Ord, Show)
200
201 rule =
202 Rule
203 { rulePron = Pronunciations{unPronunciations = []}
204 , ruleExamples = mempty
205 }
206 space = rule
207 word
208 , begining
209 , ending ::
210 RuleLexemes -> RuleLexemes
211 word = begining >>> ending
212 begining = after [LexemeBorder]
213 ending = before [LexemeBorder]
214 before ls r = RuleLexemes (unRuleLexemes r <> ls)
215 after ls r = RuleLexemes (ls <> unRuleLexemes r)
216 meaning r d = RuleLexemes (unRuleLexemes r <> [LexemeMeaning d])
217
218 type Table = Map RuleLexemes Rule
219 examples :: Table -> Map InputLexemes Pronunciation
220 examples tbl =
221 [ v & ruleExamples
222 | v <- tbl & Map.elems
223 ]
224 & Map.unionsWith (\new old -> if new == old then new else errorShow (new, old))
225
226 data Pron = Pron
227 { pronInput :: [Lexeme]
228 , pronRule :: Rule
229 }
230 deriving (Eq, Show)
231
232 data Syl = Syl
233 { sylText :: Text
234 , sylDependsOnBefore :: Bool
235 , sylDependsOnAfter :: Bool
236 , sylDependsOnMeaning :: Bool
237 , sylSound :: Text -- [IPA.Syllable []]
238 , sylIndex :: Int
239 , sylSilent :: Bool
240 , sylSplit :: Bool
241 }
242 deriving (Eq, Show)
243
244 addIndexes :: [[Either Char Pron]] -> [[Syl]]
245 addIndexes = go 0
246 where
247 go _idx [] = []
248 go idx (prons : next) = List.reverse prons' : go idx' next
249 where
250 (idx', prons') =
251 prons
252 & List.foldl'
253 ( \(i, is) -> \case
254 Left c ->
255 ( i
256 , Syl
257 { sylText = Text.singleton c
258 , sylDependsOnAfter = False
259 , sylDependsOnBefore = False
260 , sylDependsOnMeaning = False
261 , sylSound = []
262 , sylIndex = i
263 , sylSilent = True
264 , sylSplit = False
265 }
266 : is
267 )
268 Right Pron{pronRule = Rule{rulePron = Pronunciations{unPronunciations = ps}}} ->
269 ps
270 & List.foldl'
271 ( \(j, js) (t, Pronunciation{..}) ->
272 let sylText = t & unRuleLexemes & lexemesChars & Text.pack
273 in case pronunciationIPABroad of
274 []
275 | not (Text.null pronunciationText) ->
276 ( j + 1
277 , Syl
278 { sylText
279 , sylSound = pronunciationText
280 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
281 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
282 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
283 , sylIndex = j + 1
284 , sylSilent = False
285 , sylSplit = False
286 }
287 : js
288 )
289 [IPA.Syllable []]
290 | Text.null pronunciationText ->
291 ( j
292 , case js of
293 [] ->
294 Syl
295 { sylText
296 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
297 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
298 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
299 , sylSound = ""
300 , sylIndex = j
301 , sylSilent = True
302 , sylSplit = False
303 }
304 : js
305 j0@Syl{sylText = j0t} : jss -> j0{sylText = j0t <> sylText} : jss
306 )
307 syls
308 | (syls & all IPA.isSilent) && Text.null pronunciationText ->
309 ( j
310 , Syl
311 { sylText
312 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
313 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
314 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
315 , sylSound = ""
316 , sylIndex = j
317 , sylSilent = True
318 , sylSplit = False
319 }
320 : js
321 )
322 _syls@[_] ->
323 ( j + 1
324 , Syl
325 { sylText
326 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
327 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
328 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
329 , sylSound = pronunciationText
330 , sylIndex = j + 1
331 , sylSilent = False
332 , sylSplit = False
333 }
334 : js
335 )
336 _syls@[_, _] ->
337 ( j
338 , Syl
339 { sylText
340 , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
341 , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
342 , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
343 , sylSound = pronunciationText & Text.replace "." ""
344 , sylIndex = j
345 , sylSilent = False
346 , sylSplit = True
347 }
348 : js
349 )
350 syls -> errorShow syls
351 )
352 (i, is)
353 where
354 sylDependsOnAfter =
355 List.reverse >>> \case
356 LexemeBorder : _ -> True
357 LexemeSilent : _ -> True
358 LexemeConsonant : _ -> True
359 LexemeDoubleConsonant : _ -> True
360 LexemeVowel : _ -> True
361 LexemeSemiVowel : _ -> True
362 _ -> False
363 sylDependsOnBefore =
364 \case
365 LexemeBorder : _ -> True
366 LexemeSilent : _ -> True
367 LexemeConsonant : _ -> True
368 LexemeDoubleConsonant : _ -> True
369 LexemeVowel : _ -> True
370 LexemeSemiVowel : _ -> True
371 _ -> False
372 sylDependsOnMeaning =
373 List.reverse >>> \case
374 LexemeMeaning{} : _ -> True
375 _ -> False
376 )
377 (idx, [])
378
379 withCapital :: [(RuleLexemes, Rule)] -> [(RuleLexemes, Rule)]
380 withCapital =
381 foldMap \(RuleLexemes pat, rul) ->
382 [ (RuleLexemes pat, rul)
383 ,
384 ( RuleLexemes (withCapitalLexemes pat)
385 , rul
386 { rulePron = rul & rulePron & withCapitalPronunciations
387 , ruleExamples =
388 rul
389 & ruleExamples
390 & Map.mapKeys
391 ( unInputLexemes
392 >>> withCapitalLexemes
393 >>> InputLexemes
394 )
395 }
396 )
397 ]
398 where
399 withCapitalPronunciations (Pronunciations []) = Pronunciations []
400 withCapitalPronunciations (Pronunciations ((t, p) : ps)) =
401 Pronunciations ((RuleLexemes $ withCapitalLexemes $ unRuleLexemes t, p) : ps)
402 withCapitalLexemes (LexemeChar x : xs) = LexemeChar (Char.toUpper x) : xs
403 withCapitalLexemes (x : xs) = x : withCapitalLexemes xs
404 withCapitalLexemes [] = []
405
406 lexemesChars :: [Lexeme] -> [Char]
407 lexemesChars p =
408 p & foldMap \case
409 LexemeChar c -> [c]
410 _ -> []
411
412 run ::
413 Table ->
414 Text ->
415 Either
416 ( Either
417 (P.ParseErrorBundle Text ())
418 (P.ParseErrorBundle [Lexeme] ())
419 )
420 [Either Char Pron]
421 run rules inp =
422 inp
423 & runLexer
424 & either (Left . Left) \lexs ->
425 lexs
426 & runParser rules
427 & either (Left . Right) Right
428
429 runParser :: Table -> [Lexeme] -> Either (P.ParseErrorBundle [Lexeme] ()) [Either Char Pron]
430 runParser tbl inp = inp & P.runParser (parser tbl) "input"
431
432 parseLiterals ::
433 Table ->
434 [ExampleLiteral] ->
435 Either
436 ( Either
437 (P.ParseErrorBundle Text ())
438 (P.ParseErrorBundle [Lexeme] ())
439 )
440 [Either Char Pron]
441 parseLiterals rules inp =
442 inp
443 & traverse
444 ( \ExampleLiteral{..} ->
445 exampleLiteralText
446 & TextShort.toText
447 & runLexer
448 <&> ( <>
449 [ LexemeMeaning exampleLiteralMeaning
450 | exampleLiteralMeaning & TextShort.null & not
451 ]
452 )
453 )
454 & either (Left . Left) \lexs ->
455 lexs
456 & mconcat
457 & runParser rules
458 & either (Left . Right) Right
459
460 parser :: Table -> P.Parsec () [Lexeme] [Either Char Pron]
461 parser tbl = do
462 res <- P.many $ (Just . Right) <$> parseRules <|> parseChar
463 P.eof
464 return $ res & catMaybes
465 where
466 -- Match one of the rules, trying longuest first
467 parseRules :: P.Parsec () [Lexeme] Pron
468 parseRules =
469 P.choice
470 [ parseRule r
471 | r <- tbl & Map.toDescList
472 ]
473 {-
474 <|> P.choice
475 [ parseRule
476 ( RuleLexemes $
477 rulePat & unRuleLexemes <&> \case
478 LexemeChar c -> LexemeChar (c & Char.toUpper)
479 x -> x
480 , curRule
481 )
482 | (rulePat, curRule) <- tbl & Map.toDescList
483 ]
484 -}
485 parseRule (rulePat, curRule@Rule{..}) =
486 P.try do
487 let
488 pat = rulePat & unRuleLexemes
489 patSep = \case
490 LexemeVowel -> True
491 LexemeSemiVowel -> True
492 LexemeConsonant -> True
493 LexemeSilent -> True
494 LexemeAhead{} -> True
495 _ -> False
496 -- (patEnd, patBegin) = pat & List.reverse & List.span patSep
497 patBegin = pat & List.dropWhileEnd patSep
498 patEnd = pat & List.reverse & List.takeWhile patSep & List.reverse
499 patAhead =
500 pat & List.takeWhile \case
501 LexemeAhead{} -> True
502 _ -> False
503 -- parse without the ending Lexeme{Vowel,SemiVowel,Consonant}
504 P.chunk patBegin
505 P.lookAhead do
506 P.chunk $
507 patAhead <&> \case
508 LexemeAhead c -> LexemeChar c
509 x -> x
510 inpAfterBegin <- P.getInput
511 unless (List.null patEnd) do
512 inpWithAhead <- parseAhead
513 -- traceShowM ("inpWithAhead"::Text, inpWithAhead)
514 P.setInput inpWithAhead
515 P.chunk patEnd & void
516 -- insert the Lexeme{Vowel,SemiVowel,Consonant} from the output of the current rule
517 let lastSound =
518 rulePron
519 & unPronunciations
520 & List.reverse
521 & headMaybe
522 & maybe
523 []
524 ( snd
525 >>> pronunciationIPABroad
526 >>> List.reverse
527 >>> headMaybe
528 >>> maybe [] (IPA.syllableToSegments >>> List.reverse >>> lexemeHeadSound)
529 )
530 P.setInput $ lastSound <> inpAfterBegin
531 return Pron{pronInput = pat, pronRule = curRule}
532 parseChar :: P.Parsec () [Lexeme] (Maybe (Either Char Pron))
533 parseChar =
534 P.anySingle <&> \case
535 LexemeChar c -> Just $ Left c
536 _ -> Nothing
537 parseAhead :: P.Parsec () [Lexeme] [Lexeme]
538 parseAhead = do
539 nextStep <- P.observing $ Right <$> parseRules <|> Left <$> P.anySingle
540 -- traceShowM ("nextStep"::Text, nextStep & either (\err -> Left ()) Right)
541 case nextStep of
542 Right (Right Pron{pronInput, pronRule}) -> do
543 let x =
544 pronRule
545 & rulePron
546 & unPronunciations
547 & headMaybe
548 & maybe
549 []
550 ( snd
551 >>> pronunciationIPABroad
552 >>> headMaybe
553 >>> maybe [] (IPA.syllableToSegments >>> lexemeHeadSound)
554 )
555 inp <- P.getInput
556 return $ x <> pronInput <> inp
557 Right (Left lex) -> do
558 parseAhead <&> (lex :)
559 Left{} -> P.getInput
560 lexemeHeadSound :: [_] -> [Lexeme]
561 lexemeHeadSound =
562 headMaybe >>> fmap IPA.dropSegmentalFeatures >>> \case
563 Just IPA.Zero{} -> [LexemeSilent]
564 Just IPA.Vowel{} -> [LexemeVowel]
565 Just (IPA.Consonant consonant) -> do
566 case consonant of
567 IPA.Pulmonic _phonation _place IPA.Approximant -> [LexemeSemiVowel]
568 IPA.Ejective _place IPA.Approximant -> [LexemeSemiVowel]
569 _ -> [LexemeConsonant]
570 _ -> [] -- error
571
572 runLexer :: Text -> Either (P.ParseErrorBundle Text ()) [Lexeme]
573 runLexer inp = inp & P.runParser lexer "input"
574
575 exampleLiteralsLexemes :: [ExampleLiteral] -> [Lexeme]
576 exampleLiteralsLexemes ls =
577 ls & foldMap \ExampleLiteral{..} ->
578 unRuleLexemes (fromString (TextShort.unpack exampleLiteralText))
579 <> [ LexemeMeaning exampleLiteralMeaning
580 ]
581
582 lexer :: P.Parsec () Text [Lexeme]
583 lexer = do
584 lls <- P.many do
585 P.choice $
586 list
587 [ P.takeWhile1P Nothing Char.isSpace >>= \cs ->
588 return [LexemeChar c | c <- cs & Text.unpack]
589 , do
590 cs <- P.takeWhile1P Nothing Char.isLetter
591 mean <- (<|> return []) $ P.try do
592 P.single '{'
593 m <- P.takeWhile1P Nothing (/= '}')
594 P.single '}'
595 return [LexemeMeaning (TextShort.fromText m)]
596 return $
597 LexemeBorder
598 : [LexemeChar c | c <- cs & Text.unpack]
599 <> mean
600 <> [LexemeBorder]
601 , P.takeWhile1P Nothing Char.isNumber >>= \cs ->
602 return (LexemeBorder : ([LexemeChar c | c <- cs & Text.unpack] <> [LexemeBorder]))
603 , P.satisfy Char.isSymbol >>= \c ->
604 return [LexemeChar c]
605 , P.satisfy Char.isSeparator >>= \c ->
606 return [LexemeChar c]
607 , P.satisfy Char.isMark >>= \c ->
608 return [LexemeChar c]
609 , P.satisfy Char.isPunctuation >>= \c ->
610 return [LexemeChar c]
611 ]
612 P.eof
613 return $ mconcat lls
614
615 words :: [Either Char Pron] -> [[Either Char Pron]]
616 words [] = []
617 words prons = word0 : words next
618 where
619 (word0, rest) = prons & List.span (isSep >>> not)
620 (_sep, next) = rest & List.span isSep
621 isSep = \case
622 Left c | c & Char.isSpace -> True
623 _ -> False
624
625 {-
626 case statePats st & Map.lookup k of
627 Nothing -> st
628 Just (PatTree pats) ->
629 loop st {statePats = pats, stateBuffer = k : stateBuffer st}
630 Just (PatEnd end) ->
631 loop st { statePats = initPats
632 , stateBuffer = []
633 , stateInput = stateInput st
634 & LZ.insert Inp
635 { inpPats = stateBuffer st & List.reverse
636 , inpPronunciations = end
637 }
638 & LZ.right
639 }
640 -}
641
642 {-
643 parse :: PatTree -> Text -> [Inp]
644 parse initPats input =
645 let inpZip = input & Text.unpack & fmap charToInp & LZ.fromList in
646 runInp [] initPats inpZip & LZ.toList
647 where
648 charToInp :: Char -> Inp
649 charToInp c = Inp
650 { inpPats = [PosNext (PatternChar c)]
651 , inpPronunciations = []
652 }
653 runInp :: [Pos] -> PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
654 runInp oks pat inp =
655 traceShow ("runInp"::Text, ("oks"::Text) := oks, ("cur"::Text) := LZ.safeCursor inp) $
656 case pat of
657 PatEnd end ->
658 -- the pattern ends
659 inp
660 & LZ.insert Inp
661 { inpPats = oks & List.reverse
662 , inpPronunciations = end
663 }
664 & LZ.right
665 & runInp [] initPats
666 PatTree pats ->
667 -- the pattern may go on
668 case inp & LZ.safeCursor of
669 Nothing ->
670 inp
671 & runPat [] oks [PosNext PatternLexicalBorder] pats
672 Just cur ->
673 inp & LZ.delete
674 & runPat [] oks (inpPats cur & List.sort) pats
675
676 runPat :: [Pos] -> [Pos] -> [Pos] -> Map Pos PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
677 runPat kos oks todos pats inp =
678 traceShow ( "runPat"::Text
679 , ("kos"::Text) := kos
680 , ("oks"::Text) := oks
681 , ("todos"::Text) := todos
682 , ("cur"::Text) :=LZ.safeCursor inp
683 ) $
684 case todos of
685 [] | LZ.endp inp -> inp
686 & runInp kos (PatEnd [])
687 & runInp oks (PatEnd [])
688 [] ->
689 -- nothing left to advance the pattern
690 --traceShow ("runPat/[]"::Text) $
691 inp
692 & (if null kos then id else runInp kos (PatEnd []))
693 & runInp oks (PatTree pats)
694 k:ks ->
695 case pats & Map.lookup k of
696 -- the pattern ends
697 Just (PatEnd end) ->
698 --traceShow ("runPat/End"::Text) $
699 -- ks forgotten
700 inp
701 & (if null kos then id else runInp kos (PatEnd []))
702 & runInp (k:oks) (PatEnd end)
703 -- the pattern advances
704 Just (PatTree nextPats) ->
705 --traceShow ("runPat/Node"::Text) $
706 inp & runPat kos (k:oks) ks nextPats
707 -- the pattern does not advance
708 Nothing ->
709 inp & runPat (k:kos) oks ks pats
710 -}
711
712 data Lexeme
713 = LexemeBorder
714 | LexemeVowel
715 | LexemeSemiVowel
716 | LexemeConsonant
717 | LexemeDoubleConsonant
718 | LexemeSilent
719 | LexemeMeaning ShortText
720 | -- | `LexemeChar` is last to have priority when using `Map.toDescList`
721 LexemeChar Char
722 | -- | `LexemeAhead` is last to have priority when using `Map.toDescList`
723 LexemeAhead Char
724 deriving (Eq, Ord, Show)
725
726 -- data Sound
727 -- = SoundVowel
728 -- | SoundConsonant
729 -- deriving (Eq, Ord, Show)
730
731 {-
732 newtype Lexemes = Lexemes { unLexemes :: [Lexeme] }
733 deriving (Eq, Ord, Show)
734 instance P.Stream Lexemes where
735 type Token Lexemes = Lexeme
736 type Tokens Lexemes = Lexemes
737 tokensToChunk _px = Lexemes
738 chunkToTokens _px = unLexemes
739 chunkLength _px = unLexemes >>> List.length
740 chunkEmpty _px = unLexemes >>> List.null
741 take1_ = unLexemes >>> P.take1_ >>> coerce
742 takeN_ n = unLexemes >>> P.takeN_ n >>> coerce
743 takeWhile_ p = unLexemes >>> P.takeWhile_ p >>> coerce
744
745 instance IsString Lexemes where
746 fromString s =
747 s
748 & Text.pack
749 & runLexer
750 & either
751 errorShow
752 ((`appEndo` []) >>> Lexemes)
753 -}
754
755 newtype RuleLexemes = RuleLexemes {unRuleLexemes :: [Lexeme]}
756 deriving (Eq, Ord, Show)
757 instance HasTypeDefault RuleLexemes where
758 typeDefault = RuleLexemes typeDefault
759 instance Semigroup RuleLexemes where
760 RuleLexemes x <> RuleLexemes y = RuleLexemes (x <> y)
761 instance Monoid RuleLexemes where
762 mempty = RuleLexemes mempty
763 instance IsList RuleLexemes where
764 type Item RuleLexemes = Lexeme
765 fromList = RuleLexemes
766 toList = unRuleLexemes
767 instance IsString RuleLexemes where
768 fromString s =
769 s
770 & Text.pack
771 & runLexer
772 & either
773 errorShow
774 ( List.dropWhileEnd (== LexemeBorder)
775 >>> List.dropWhile (== LexemeBorder)
776 >>> RuleLexemes
777 )
778
779 newtype InputLexemes = InputLexemes {unInputLexemes :: [Lexeme]}
780 deriving (Eq, Ord, Show)
781 instance HasTypeDefault InputLexemes where
782 typeDefault = InputLexemes typeDefault
783 instance Semigroup InputLexemes where
784 InputLexemes x <> InputLexemes y = InputLexemes (x <> y)
785 instance Monoid InputLexemes where
786 mempty = InputLexemes mempty
787 instance IsList InputLexemes where
788 type Item InputLexemes = Lexeme
789 fromList = InputLexemes
790 toList = unInputLexemes
791 instance IsString InputLexemes where
792 fromString s =
793 s
794 & Text.pack
795 & runLexer
796 & either errorShow InputLexemes
797
798 instance P.ShowErrorComponent () where
799 showErrorComponent = show
800 errorComponentLen _ = 2
801 instance P.VisualStream [Lexeme] where
802 showTokens _s = show
803 tokensLength _s xs = xs <&> (show >>> List.length) & sum
804 instance P.TraversableStream [Lexeme] where
805 reachOffset off pos = (Nothing, pos{P.pstateOffset = P.pstateOffset pos + off})
806
807 data LexemeTag
808 = LexemeTagLetter
809 | LexemeTagSpace
810 | LexemeTagPunctuation
811 | LexemeTagSeparator
812 | LexemeTagMark
813 | LexemeTagSymbol
814 | LexemeTagDefinition
815 | LexemeTagBorder
816 deriving (Eq, Ord, Show)
817 deriving instance Ord (IPA.Syllable [])
818 deriving instance Ord IPA.SuprasegmentalFeature
819 deriving instance Ord IPA.SegmentalFeature
820 deriving instance Ord IPA.Sibilance
821 deriving instance Ord IPA.Manner
822 deriving instance Ord IPA.Phonation
823 deriving instance Ord IPA.Roundedness
824 deriving instance Ord IPA.Height
825 deriving instance Ord IPA.Vowel
826 deriving instance Ord IPA.Consonant
827 deriving instance Ord IPA.Segment
828
829 {-
830 tableToMatch :: Table -> [Lexeme] -> [Pronunciations]
831 tableToMatch tbl = loop
832 where
833 loop prevBorder = \case
834 InputText inp ->
835 [ (matchingLength, )
836 | (trans, transMach) <- chunk & chunkMachine & machineAlts & Map.toList
837 , let matchingLength = transMatchingLength input trans
838 , 0 < matchingLength || not (isTransConsume trans)
839 , let (inputRead, inputRest) = input & Text.splitAt matchingLength
840 ]
841 & Map.fromListWith (\new old -> old)
842 & Map.lookupMax
843 <&> snd
844
845 -}
846 tableHtml :: Table -> IO HTML.Html
847 tableHtml tbl = do
848 dataPath <- Self.getDataDir <&> File.normalise
849 let title :: String = "LexerDict"
850 let pageOrientation = Paper.PageOrientationPortrait
851 let pageSize = Paper.PageSizeA4
852 let partLangue = LangueFrançais
853 return do
854 HTML.docTypeHtml do
855 HTML.head do
856 HTML.title $ title & HTML.toHtml
857 forM_
858 ( [ "styles/Paper.css"
859 , "styles/French/Lexer.css"
860 , "styles/Rosetta/Reading.css"
861 ]
862 & list
863 )
864 \cssFile ->
865 HTML.link
866 ! HA.rel "stylesheet"
867 ! HA.type_ "text/css"
868 ! HA.href (dataPath </> cssFile & HTML.toValue)
869 HTML.styleCSS $ HTML.cssPrintPage pageOrientation pageSize
870 -- HTML.styleCSS $ pageDifficulties & difficultyCSS
871 HTML.body
872 ! classes ["A4", "french-lexer"]
873 $ do
874 "\n"
875 let rulesChunks = tbl & Map.toList & chunksOf 50
876 forM_ rulesChunks \rules ->
877 HTML.section
878 ! classes
879 [ "sheet"
880 ]
881 ! styles
882 []
883 $ do
884 forM_ (rules & List.zip [1 :: Int ..]) \(ruleIndex, (rulePat, Rule{..})) -> do
885 "\n"
886 HTML.div
887 ! classes
888 [ "dict-entry"
889 , if even ruleIndex then "even" else "odd"
890 ]
891 ! styles
892 []
893 $ do
894 "\n"
895 HTML.div
896 ! classes
897 [ "dict-key"
898 , "lang-" <> className partLangue
899 ]
900 ! styles
901 []
902 -- "grid-template-columns" :=
903 -- (0.5 & cm & HTML.toCSS)
904 -- & List.replicate lexerDictMaxKeyLength
905 -- & List.unwords
906
907 $ do
908 forM_ (["model"] :: [String]) \rowKind -> do
909 forM_ (rulePat & unRuleLexemes) \ruleChar -> do
910 -- let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
911 case ruleChar of
912 LexemeChar c -> do
913 HTML.span
914 ! classes
915 [ "dict-key-cell"
916 , rowKind
917 -- , "script-" <> className uniScript
918 ]
919 $ do
920 c & HTML.toHtml
921 _ -> ""
922 HTML.div
923 ! classes
924 [ "dict-pronunciation"
925 ]
926 $ do
927 -- HTML.span ! classes ["arrow"] $ "→"
928 case rulePron of
929 Pronunciations
930 { unPronunciations =
931 all (snd >>> pronunciationIPABroad >>> all IPA.isSilent) -> True
932 } -> ""
933 Pronunciations{unPronunciations = is} ->
934 is
935 & foldMap (snd >>> pronunciationIPABroad >>> foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic))
936 & HTML.toHtml
937 HTML.div
938 ! classes
939 [ "dict-lexeme"
940 ]
941 $ do
942 -- HTML.span ! classes ["arrow"] $ "→"
943 forM_ (ruleExamples & Map.toList) \(_inp, Pronunciation{..}) -> do
944 HTML.span
945 ! classes
946 []
947 $ do
948 case pronunciationIPABroad of
949 [] -> pronunciationText & HTML.toHtml
950 _ -> pronunciationIPABroad & foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic) & HTML.toHtml
951 "; "