]> Git — Sourcephile - julm/worksheets.git/blob - src/Language/Pron.hs
wip
[julm/worksheets.git] / src / Language / Pron.hs
1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE StandaloneDeriving #-}
4
5 module Language.Pronunciation where
6
7 import Control.Applicative (asum)
8 import Data.List qualified as List
9 import Data.Map.Strict qualified as Map
10 import Data.Set qualified as Set
11 import Data.Text qualified as Text
12 import Data.Text.Lazy qualified as TextLazy
13 import Language
14 import Paths_worksheets qualified as Self
15 import System.FilePath.Posix ((</>))
16 import System.FilePath.Posix qualified as File
17 import Text.Blaze.Html5.Attributes qualified as HA
18 import Worksheets.Utils.Char qualified as Char
19 import Worksheets.Utils.HTML (className, classes, cm, styles, (!))
20 import Worksheets.Utils.HTML qualified as HTML
21 import Worksheets.Utils.IPA qualified as IPA
22 import Worksheets.Utils.Paper qualified as Paper
23 import Worksheets.Utils.Prelude
24 import Prelude (error)
25
26 -- import Data.Radix1Tree.Word8.Key.Unsafe qualified as RT
27 -- import Data.Radix1Tree.Word8.Strict qualified as RT
28
29 -- import Data.List.Split qualified as Split
30
31 -- radixFromList :: [(Text, a)] -> RT.Radix1Tree a
32 -- radixFromList = foldr (\(k, a) p -> RT.insert (RT.unsafeFeedText k) a p) RT.empty
33
34 data Pronunciation = PronunciationIPABroad
35 { pronunciationText :: Text
36 , pronunciationIPA :: IPA.Syllable []
37 }
38 deriving (Eq, Ord, Show)
39 instance IsString Pronunciation where
40 fromString = \case
41 "" -> PronunciationIPABroad "" $ IPA.Syllable []
42 s -> PronunciationIPABroad (s & Text.pack) $ fromString s
43
44 data Lexeme = Lexeme
45 { lexemeKey :: Text
46 , -- , lexemeNext :: Text
47 lexemePron :: Pronunciation
48 }
49 deriving (Eq, Show, Generic)
50
51 data LexemePron = LexemePron
52 { lexemePronunciation :: Pronunciation
53 , lexemeExample :: [Literal]
54 }
55 deriving (Eq, Show)
56
57 data Literal = Literal
58 { literalText :: ShortText
59 , literalTags :: Set LiteralTag
60 }
61 deriving (Eq, Ord, Show)
62 instance IsString Literal where
63 fromString s =
64 Literal
65 { literalText = s & fromString
66 , literalTags = Set.empty
67 }
68 data LiteralTag
69 = LiteralTagOccurence
70 | LiteralTagMeta
71 | LiteralTagSilent
72 deriving (Eq, Ord, Show)
73
74 hyphen =
75 Literal
76 { literalText = "-"
77 , literalTags = [LiteralTagMeta] & Set.fromList
78 }
79 occurence lit = lit{literalTags = lit & literalTags & Set.insert LiteralTagOccurence}
80 silent lit = lit{literalTags = lit & literalTags & Set.insert LiteralTagSilent}
81
82 type SyllableText = ShortText
83 type SyllableBroad = IPA.Syllable []
84 type SyllablesTable = Map SyllableText (Map SyllableText [([Literal], SyllableBroad)])
85
86 type ContextToLexemePron = (LexemePron, Map (IPA.Syllable []) LexemePron)
87 data Machine = Machine
88 { machinePron :: Maybe Pronunciation
89 , machineExample :: Set [Literal]
90 , machineAlts :: Map Trans Machine
91 -- ^ Those 'Machine's must not be recursive.
92 }
93 deriving (Eq, Show)
94 machine =
95 Machine
96 { machinePron = Nothing
97 , machineExample = Set.empty
98 , machineAlts = Map.empty
99 }
100
101 {-
102 instance Monoid Machine where
103 mempty = Machine
104 { machinePron = ""
105 , machineExample = []
106 , machineAlts = Map.empty
107 }
108 instance Semigroup Machine where
109 x <> y = Machine
110 { machinePron = if machinePron x == machinePron y
111 then machinePron x
112 else errorShow (machinePron x, machinePron y)
113 , machineExample = machineExample x <> machineExample y
114 , machineAlts = Map.unionWith (<>) (machineAlts x) (machineAlts y)
115 }
116 -}
117 machineExamples :: Machine -> Set [Literal]
118 machineExamples mach =
119 machineAlts mach
120 & foldMap machineExamples
121 & Set.union (machineExample mach)
122
123 data Input
124 = InputChar
125 | InputBorder
126 | Input
127
128 data Chunk = Chunk
129 { chunkInterval :: [Trans]
130 , chunkInputNext :: [Char]
131 , chunkMachine :: Machine
132 }
133 deriving (Eq, Show, Generic)
134
135 chunkText :: Chunk -> Text
136 chunkText Chunk{chunkInterval} =
137 chunkInterval
138 & List.foldl'
139 ( \acc -> \case
140 TransConsume (ObservChar c) -> c : acc
141 _ -> acc
142 )
143 []
144 & Text.pack
145
146 chunkPronunciation :: Chunk -> Text
147 chunkPronunciation Chunk{chunkMachine} =
148 chunkMachine
149 & machinePron
150 & maybe "" pronunciationText
151
152 chunksWords :: [Chunk] -> [[Chunk]]
153 chunksWords [] = []
154 chunksWords chks = word0 : chunksWords next
155 where
156 (word0, rest) = chks & List.span (isSep >>> not)
157 (_sep, next) = rest & List.span isSep
158 isSep chk = chk & chunkInterval & List.elem (TransConsume ObservSpace)
159
160 inputToObservs :: Text -> [Observ]
161 inputToObservs t = ObservBorder : Text.foldr (\c acc -> ObservChar c : acc) [ObservBorder] t
162
163 data State
164 = StateBorder
165 | StateNonBorder
166
167 transChunk :: Trans -> Chunk -> Maybe Chunk
168 transChunk obs chunk = do
169 nextMachine <- chunk & chunkMachine & machineAlts & Map.lookup trans
170 Chunk
171 { chunkMachine = nextMachine
172 , chunkInterval = trans : chunkInterval chunk
173 , chunkInputNext = chunkInputNext chunk
174 }
175
176 borderChunk chunk =
177 chunk
178 & transChunk (TransLookAhead ObservBorder)
179
180 readChunk :: Bool -> Chunk -> Maybe Chunk
181 readChunk isBorder chunk =
182 case chunk & chunkInputNext of
183 [] | not isBorder -> chunk & borderChunk >>= readChunk False
184 | otherwise -> Just chunk
185 i:is ->
186 | not isBorder && or [Char.isSpace i, ]
187
188 {-
189 consumeChar :: State -> Chunk -> Maybe Chunk
190 consumeChar st chunk = Nothing
191 | st == StateBorder = (chunk, [])
192 , otherwise = chunk & advanceChunk ObservBorder
193 consumeChar (c:cs) chunk
194 | st == StateBorder =
195 Char
196 case ob of
197 ObservBorder
198 | Just nextMach <- jumps & Map.lookup (TransLookAhead ObservBorder) ->
199 Just Chunk
200 { chunkInputNext = obs
201 , chunkMachine = nextMach
202 , chunkInterval = TransLookAhead ObservBorder : chunkInterval chunk
203 }
204 ObservChar c
205 | Just nextMach <- jumps & Map.lookup (ObservChar c) ->
206 Just Chunk
207 { chunkInputNext = obs
208 , chunkMachine = nextMach
209 , chunkInterval = TransConsume ob : chunkInterval chunk
210 }
211 | Char.isSpace c
212 , Just nextMach <- jumps & Map.lookup ObservSpace ->
213 Just Chunk
214 { chunkInputNext = obs
215 , chunkMachine = nextMach
216 , chunkInterval = TransConsume ob : chunkInterval chunk
217 }
218 | Char.isPunctuation c
219 , Just nextMach <- jumps & Map.lookup ObservPunctuation ->
220 Just Chunk
221 { chunkInputNext = obs
222 , chunkMachine = nextMach
223 , chunkInterval = TransConsume ob : chunkInterval chunk
224 }
225 where
226 jumps = chunk & chunkMachine & machineAlts
227 piorities :: [Observ]
228 piorities=
229 TransConsume ob :
230 TransLookAhead ob :
231 case ob of
232 ObservChar c ->
233 [ TransConsume o | o <- obsClasses ]
234 [ TransLookAhead o | o <- obsClasses ]
235 where
236 obsClasses =
237 mconcat
238 [ [ObservSpace | Char.isSpace c]
239 , [ObservPunctuation | Char.isPunctuation c]
240 , [ObservMark | Char.isMark c]
241 , [ObservSymbol | Char.isSymbol c]
242 , [ObservSeparator | Char.isSeparator c]
243 ]
244 -}
245
246 runMachine :: Machine -> Text -> Either ([Trans], Text) [Chunk]
247 runMachine machInit = inputToObservs >>> loop True [] initChunk
248 where
249 initChunk inp =
250 Chunk
251 { chunkInterval = []
252 , chunkInputNext = inp
253 , chunkMachine = machInit
254 }
255 loop :: Bool -> [Chunk] -> Chunk -> [Observ] -> Either ([Trans], Text) [Chunk]
256 loop isInit prevChunks chunk currentInput =
257 -- traceShow ("runMachine.loop" :: Text, isInit, chunk{chunkMachine = Machine "" [] []}, currentInput) $
258 if null currentInput
259 then Right [chunk & chunkUpdateBehind prevChunks & chunkUpdateAhead []]
260 else
261 case chunk & updateChunkWithInput currentInput of
262 Just (newChunk, inputNext) -> loop False prevChunks newChunk inputNext
263 Nothing
264 | isInit -> Left (chunk & chunkInterval, currentInput & Text.take 10)
265 | otherwise -> do
266 -- Reset the machine to process the next chunk
267 nextChunks <- loop True (chunk : prevChunks) (initChunk currentInput) currentInput
268 Right $ (chunk & chunkUpdateBehind prevChunks & chunkUpdateAhead nextChunks) : nextChunks
269
270 updateChunkWithInput :: Text -> Chunk -> Maybe (Chunk, Text)
271 updateChunkWithInput input chunk =
272 -- traceShow ("updateChunkWithInput" :: Text, chunk{chunkMachine = Machine "" [] []}) $
273 [ (matchingLength, (newChunk, inputRest))
274 | (trans, transMach) <- chunk & chunkMachine & machineAlts & Map.toList
275 , let matchingLength = transMatchingLength input trans
276 , 0 < matchingLength || not (isTransConsume trans)
277 , -- , transMach & machinePron & isJust
278 let (inputRead, inputRest) = input & Text.splitAt matchingLength
279 , let newChunk =
280 Chunk
281 { -- chunkText = chunkText chunk <> inputRead
282 chunkMachine = transMach
283 , chunkInterval = trans : chunkInterval chunk
284 }
285 ]
286 & Map.fromListWith (\new old -> old)
287 & Map.lookupMax
288 <&> snd
289 where
290 transMatchingLength :: Text -> Trans -> Int
291 transMatchingLength inp = \case
292 TransConsume obs ->
293 case obs of
294 ObservChar prefix
295 | Text.isPrefixOf prefix inp -> Text.length prefix
296 | Text.isPrefixOf prefix (inp & Text.take (Text.length prefix) & Text.toLower) -> Text.length prefix
297 | otherwise -> 0
298 ObservSpace
299 | Just (c0, _) <- Text.uncons inp
300 , Char.isSpace c0 ->
301 1
302 | otherwise -> 0
303 ObservPunctuation
304 | Just (c0, _) <- Text.uncons inp
305 , Char.isPunctuation c0 ->
306 1
307 | otherwise -> 0
308 ObservMark
309 | Just (c0, _) <- Text.uncons inp
310 , Char.isMark c0 ->
311 1
312 | otherwise -> 0
313 ObservNumber
314 | Just (c0, _) <- Text.uncons inp
315 , Char.isNumber c0 ->
316 1
317 | otherwise -> 0
318 ObservSeparator
319 | Just (c0, _) <- Text.uncons inp
320 , Char.isSeparator c0 ->
321 1
322 | otherwise -> 0
323 ObservSymbol
324 | Just (c0, _) <- Text.uncons inp
325 , Char.isSymbol c0 ->
326 1
327 | otherwise -> 0
328 _ -> 0
329 _ -> 0
330
331 -- transNextOrId :: Trans -> Machine -> Machine
332 -- transNextOrId trans mach = mach & machineAlts & Map.findWithDefault mach trans
333 filterTransConsume = List.filter (\case TransConsume{} -> True; _ -> False)
334 isTransConsume = \case
335 TransConsume{} -> True
336 _ -> False
337 applyTrans :: Trans -> Chunk -> Chunk
338 applyTrans trans chunk =
339 case chunk & chunkMachine & machineAlts & Map.lookup trans of
340 Nothing -> chunk
341 Just nextMachine ->
342 -- \| Nothing <- nextMachine & machinePron -> chunk
343 -- \| otherwise ->
344 chunk
345 { chunkMachine = nextMachine
346 , chunkInterval = trans : chunkInterval chunk
347 -- , chunkText = chunkText chunk
348 }
349 applyTransSeq :: [Trans] -> Chunk -> Chunk
350 applyTransSeq [] chunk = chunk
351 applyTransSeq (t : ts) chunk = chunk & applyTrans t & applyTransSeq ts
352
353 chunkUpdateAhead :: [Chunk] -> Chunk -> Chunk
354 chunkUpdateAhead aheadChunks chunk =
355 -- traceShow ("chunkUpdateAhead"::Text, chunk & chunkText, aheadObservs) $
356 chunk & applyTransSeq [TransLookAhead x | x <- mconcat aheadObservs]
357 where
358 aheadObservs :: [[Observ]]
359 aheadObservs =
360 [ -- specific observation
361 case aheadChunks of
362 Chunk{chunkInterval = List.reverse . filterTransConsume -> TransConsume obs : _} : _ -> [obs]
363 _ -> []
364 , -- border observation
365 case aheadChunks of
366 Chunk{chunkInterval = List.reverse . filterTransConsume -> TransConsume obs : _} : _ ->
367 case obs of
368 ObservSpace -> [ObservBorder]
369 ObservNumber -> [ObservBorder]
370 ObservMark -> [ObservBorder]
371 ObservPunctuation -> [ObservBorder]
372 _ -> []
373 [] -> [ObservBorder]
374 _ -> []
375 , -- vowel/semivowel/consonant observation
376 case aheadChunks of
377 Chunk{chunkMachine = Machine{machinePron = Just PronunciationIPABroad{pronunciationIPA = IPA.syllableToSegments -> seg : _}}} : _ ->
378 case seg of
379 IPA.Vowel{} -> [ObservVowel]
380 IPA.Consonant (IPA.Pulmonic _phonation _place IPA.Approximant) -> [ObservSemiVowel]
381 IPA.Consonant (IPA.Ejective _place IPA.Approximant) -> [ObservSemiVowel]
382 _ -> [ObservConsonant]
383 _ -> []
384 ]
385
386 chunkUpdateBehind :: [Chunk] -> Chunk -> Chunk
387 chunkUpdateBehind behindChunks chunk =
388 -- traceShow ("chunkUpdateBehind"::Text, chunk & chunkText, behindObservs, behindChunks <&> chunkText) $
389 chunk & applyTransSeq [TransLookBehind x | x <- mconcat behindObservs]
390 where
391 behindObservs :: [[Observ]]
392 behindObservs =
393 [ -- specific observation
394 case behindChunks of
395 Chunk{chunkInterval = filterTransConsume -> TransConsume obs : _} : _ -> [obs]
396 _ -> []
397 , -- border observation
398 case behindChunks of
399 Chunk{chunkInterval = filterTransConsume -> TransConsume obs : _} : _ ->
400 case obs of
401 ObservSpace -> [ObservBorder]
402 ObservNumber -> [ObservBorder]
403 ObservMark -> [ObservBorder]
404 ObservPunctuation -> [ObservBorder]
405 _ -> []
406 [] -> [ObservBorder]
407 _ -> []
408 , -- vowel/semivowel/consonant observation
409 case behindChunks of
410 Chunk{chunkMachine = Machine{machinePron = Just PronunciationIPABroad{pronunciationIPA = IPA.syllableToSegments >>> List.reverse -> seg : _}}} : _ ->
411 case seg of
412 IPA.Vowel{} -> [ObservVowel]
413 IPA.Consonant (IPA.Pulmonic _phonation _place IPA.Approximant) -> [ObservSemiVowel]
414 IPA.Consonant (IPA.Ejective _place IPA.Approximant) -> [ObservSemiVowel]
415 _ -> [ObservConsonant]
416 _ -> []
417 ]
418
419 -- planète
420 data Trans
421 = TransConsume Observ
422 | TransLookAhead Observ
423 | TransLookBehind Observ
424 | TransDefinition Text
425 deriving (Eq, Ord, Show)
426 data Observ
427 = ObservChar Char
428 | ObservConsonant
429 | ObservSemiVowel
430 | ObservVowel
431 | ObservLetter
432 | ObservMark
433 | ObservNumber
434 | ObservPunctuation
435 | ObservSeparator
436 | ObservSpace
437 | ObservSymbol
438 | ObservBorder
439 deriving (Eq, Ord, Show)
440 instance IsString [Trans] where
441 fromString = fmap (TransConsume . ObservChar)
442
443 data LexerDict = LexerDict
444 { lexerDictMap :: Map Text (Map PronContext LexemePron)
445 , lexerDictMaxKeyLength :: Int
446 }
447
448 {-
449 contextToLexemePron :: [(Text, ContextToLexemePron)] -> [(Text, LexemePron)]
450 contextToLexemePron =
451 foldr
452 (
453 \(key, (defLex, nextTolex))
454 acc@((nextKey,nextLex):as) ->
455 Map.findWithDefault defLex nextLex nextToLex:acc
456 ) []
457 -}
458
459 borderLeftChar = '⌟'
460 borderRightChar = '⌞'
461 borderLeftText = borderLeftChar & Text.singleton
462 borderRightText = borderRightChar & Text.singleton
463 borderInnerText = borderRightText <> borderLeftText
464
465 {-
466 mapLookupLonguest :: Text -> LexerDict -> Maybe Lexeme
467 mapLookupLonguest inp LexerDict{..} =
468 [ lexerDictMap & Map.lookup (key & Text.toLower)
469 & maybeToList
470 & foldMap \nextToVariant ->
471 let nextMax :: Int =
472 nextToVariant & Map.keys
473 <&> Text.length
474 & Set.fromList
475 & Set.lookupMax
476 & fromMaybe (error "empty map")
477 in
478 [ nextToVariant & Map.lookup (next & Text.toLower)
479 <&> (\variant -> Lexeme{lexemeKey=key, lexemeNext=next, lexemePron=variant & lexemePronunciation})
480 & maybeToList
481 | next <- inp & Text.drop (key & Text.length)
482 & Text.take nextMax
483 & Text.inits
484 & List.reverse
485 ] & mconcat
486 | key <- inp & Text.take lexerDictMaxKeyLength
487 & Text.inits
488 & List.reverse
489 , not $ Text.null key
490 ]
491 & mconcat
492 & headMaybe
493 -}
494
495 lexerInit :: Text -> Text
496 lexerInit input =
497 borderLeftText <> innerInput <> borderRightText
498 where
499 innerInput = input & Text.replace " " borderInnerText
500
501 type LexerError = (Text, [Text])
502
503 data PronContext
504 = PronContextStressed
505 | PronContextUnstressed
506 | PronContextBeforeBorder
507 | PronContextBeforeAnyVowel
508 | -- | Aka. semi-vowels
509 PronContextBeforeAnySemiVowel
510 | PronContextBeforeAnyConsonant
511 | PronContextBeforeAny
512 | PronContextBeforeSegment IPA.Segment
513 deriving (Eq, Ord, Show)
514 deriving instance Ord (IPA.Syllable [])
515 deriving instance Ord IPA.SuprasegmentalFeature
516 deriving instance Ord IPA.SegmentalFeature
517 deriving instance Ord IPA.Sibilance
518 deriving instance Ord IPA.Manner
519 deriving instance Ord IPA.Phonation
520 deriving instance Ord IPA.Roundedness
521 deriving instance Ord IPA.Height
522 deriving instance Ord IPA.Vowel
523 deriving instance Ord IPA.Consonant
524 deriving instance Ord IPA.Segment
525
526 lexer :: LexerDict -> Text -> Either LexerError [Lexeme]
527 lexer dict input = input & lexerInit & splitChunks <&> foldChunks
528 where
529 nextChunk :: Text -> Maybe (Text, Map PronContext LexemePron)
530 nextChunk inp =
531 [ dict & lexerDictMap & Map.lookup (key & Text.toLower) <&> (key,) & maybeToList
532 | key <-
533 inp
534 & Text.take (dict & lexerDictMaxKeyLength)
535 & Text.inits
536 & List.reverse
537 , not $ Text.null key
538 ]
539 & mconcat
540 & headMaybe
541 splitChunks :: Text -> Either LexerError [(Text, Map PronContext LexemePron)]
542 splitChunks inp =
543 inp
544 & nextChunk
545 & \case
546 Nothing -> Left (inp, keys)
547 where
548 keys =
549 inp
550 & Text.take (dict & lexerDictMaxKeyLength)
551 & Text.inits
552 & List.reverse
553 Just kv@(key, _contextToVariant)
554 | otherwise ->
555 if Text.null inpNext
556 then Right [kv]
557 else case splitChunks inpNext of
558 Left (rest, keys) -> Left (rest, keys)
559 Right res' -> Right (kv : res')
560 where
561 inpNext = inp & Text.drop (Text.length key)
562 dropZeros :: [Lexeme] -> [Lexeme]
563 dropZeros = List.dropWhile \case
564 Lexeme{lexemePron = PronunciationIPABroad{pronunciationIPA = IPA.Syllable [IPA.Zero{}]}} -> True
565 _ -> False
566 foldChunks :: [(Text, Map PronContext LexemePron)] -> [Lexeme]
567 foldChunks =
568 foldr
569 ( \(chunk, contextToVariant) afters ->
570 (: afters) $
571 fromMaybe (Lexeme chunk $ PronunciationIPABroad "" (IPA.Syllable [IPA.Zero])) $
572 asum $
573 list
574 [ case afters of
575 [] -> Nothing
576 Lexeme{lexemeKey} : _ ->
577 case lexemeKey & Text.unpack of
578 c : _cs
579 | c & Char.isAlphaNum & not ->
580 contextToVariant
581 & Map.lookup PronContextBeforeBorder
582 <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
583 _ -> Nothing
584 , case afters & dropZeros of
585 [] -> Nothing
586 Lexeme{lexemePron = PronunciationIPABroad{pronunciationIPA = pron}} : _ ->
587 case pron & IPA.syllableToSegments of
588 seg : _
589 | Just LexemePron{lexemePronunciation} <-
590 contextToVariant & Map.lookup (PronContextBeforeSegment seg) ->
591 Just $ Lexeme chunk lexemePronunciation
592 IPA.Vowel{} : _ ->
593 contextToVariant
594 & Map.lookup PronContextBeforeAnyVowel
595 <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
596 IPA.Consonant consonant : _ ->
597 case consonant of
598 IPA.Pulmonic _phonation _place IPA.Approximant ->
599 contextToVariant
600 & Map.lookup PronContextBeforeAnySemiVowel
601 <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
602 IPA.Ejective _place IPA.Approximant ->
603 contextToVariant
604 & Map.lookup PronContextBeforeAnySemiVowel
605 <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
606 _ ->
607 contextToVariant
608 & Map.lookup PronContextBeforeAnyConsonant
609 <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
610 _ -> Nothing
611 , contextToVariant
612 & Map.lookup PronContextBeforeAny
613 <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
614 ]
615 )
616 []
617
618 lexerDict lexerDictMap =
619 LexerDict
620 { lexerDictMap
621 , lexerDictMaxKeyLength =
622 lexerDictMap
623 & Map.keys
624 <&> Text.length
625 & Set.fromList
626 & Set.lookupMax
627 & fromMaybe (error "empty map")
628 }
629
630 lexerPron :: LexerDict -> Text -> Either LexerError [(Text, Text)]
631 lexerPron dict inp =
632 inp
633 & lexer dict
634 <&> foldMap
635 ( \Lexeme{..} ->
636 lexemePron & \case
637 PronunciationIPABroad txt (IPA.Syllable [IPA.Zero]) -> [(lexemeKey, txt)]
638 PronunciationIPABroad txt _ipa -> [(lexemeKey, txt)]
639 )
640
641 data FrenchToken = FrenchToken
642 { tokenText :: Text
643 , tokenSound :: Text
644 }
645
646 lexerChunks :: LexerDict -> Text -> Either LexerError [FrenchToken]
647 lexerChunks dict inp =
648 inp
649 & lexer dict
650 <&> foldMap
651 ( \Lexeme{..} ->
652 lexemePron & \case
653 PronunciationIPABroad{pronunciationIPA = IPA.Syllable [IPA.Zero]} -> [FrenchToken lexemeKey ""]
654 PronunciationIPABroad{pronunciationText} -> [FrenchToken lexemeKey pronunciationText]
655 )
656
657 groupLexemes :: [Lexeme] -> [[Lexeme]]
658 groupLexemes ls =
659 ls -- & Split.split ( Split.whenElt (\(t, _) -> t == "\n") & Split.dropDelims)
660 & group
661 where
662 group :: [Lexeme] -> [[Lexeme]]
663 group [] = []
664 -- group (inpHead@("\n", lex) : inpTail) =
665 group (inpHead : inpTail) =
666 if inpHead & lexemeKey & Text.isPrefixOf borderLeftText
667 then
668 ( let (seps, rest) = inpTail & List.span \l -> not $ l & lexemeKey & Text.isPrefixOf borderLeftText
669 in (inpHead : seps) : group rest
670 )
671 else error "groupLexemes"
672
673 pronunciation :: LexerDict -> [Either Lexeme Text] -> [[Lexeme]]
674 pronunciation dict ts =
675 [ case lexOrText of
676 Left lex -> [lex]
677 Right txt ->
678 txt
679 & lexer dict
680 & either (error . TextLazy.unpack . pShow) id
681 -- & List.intercalate [("\n", LexemePron{lexemePronunciation=PronunciationSilent, lexemeExample=[]})]
682 where
683
684 | -- lines = txt & Text.split (== '\n')
685 lexOrText <- ts
686 ]
687 & mconcat
688 & groupLexemes
689
690 lexerWords dict =
691 [ (word, word & lexerPron dict)
692 | word <-
693 [ "poule"
694 ]
695 ]
696
697 -- 1|de,ʁə,ʁe,ʁa
698 {-
699 syllablesTable :: SyllablesTable
700 syllablesTable =
701 [ "b" :=
702 [ "a" := ["ba" := "ba"]
703 , "e" := ["ber" := "bɛʁ", "be" := "bə", "be" := "bɛ"]
704 , "é" := ["bé" := "be"]
705 ]
706 ]
707 -}
708 -- 1|de,ʁə,ʁe,ʁa
709 -- 2|ʁa,li,a,ti,ʁɛ,zə,ɑ̃
710 -- 3|ɑ̃,ni,ʁi,ka,e,ze,ʁɔ̃,ta,te,si,za
711 -- 4|za,to,ma,na,di,mi,kɔ̃,kɔ,o,sjɔ̃,tə,la,pa,ne
712 -- 5|ne,i,fi,se,bi,də,ɔ,ɛ̃,sje,ʁje,me,nə,ʁjɔ̃,mɑ̃,le,tɔ,lɔ,pi,ba,vi,zɑ̃
713
714 syllablesTableToHTML :: SyllablesTable -> IO HTML.Html
715 syllablesTableToHTML sylInitToSylFinalToPhon = do
716 -- FIXME: this absolute path is not portable out of my system
717 dataPath <- Self.getDataDir <&> File.normalise
718 return do
719 -- let pageOrientation = Paper.PageOrientationLandscape
720 -- let pageSize = Paper.PageSizeA4
721 HTML.docTypeHtml do
722 HTML.head do
723 HTML.title $ "Syllabes"
724 forM_
725 ( [ "Paper.css"
726 -- , "SyllableTable.css"
727 ]
728 & list
729 )
730 \cssFile ->
731 HTML.link
732 ! HA.rel "stylesheet"
733 ! HA.type_ "text/css"
734 ! HA.href (dataPath </> "styles" </> cssFile & HTML.toValue)
735 -- HTML.styleCSS $ HTML.cssPrintPage pageOrientation pageSize
736 let sylInits = sylInitToSylFinalToPhon & Map.keysSet
737 let sylFinals = sylInitToSylFinalToPhon & Map.elems & foldMap Map.keysSet
738 HTML.body ! classes ["A4"] $ do
739 "\n"
740 forM_ ([1 :: Int .. 3] & list) \page ->
741 HTML.section
742 ! classes ["sheet"]
743 $ do
744 HTML.article do
745 "test" <> show page & HTML.toHtml
746
747 {-
748 HTML.div
749 ! classes ["main-page"]
750 $ do
751 HTML.div
752 ! classes
753 [ "syllable-table"
754 , "sub-page"
755 , "page-" <> className pageSize <> "-" <> className pageOrientation
756 ]
757 ! styles
758 [ "grid-template-columns" := (1 & fr & HTML.toCSS) & List.replicate (1 + Set.size sylFinals) & List.unwords
759 ]
760 $ do
761 forM_ sylFinals \sylFinal -> do
762 HTML.div do
763 sylFinal & HTML.toHtml
764 forM_ sylFinals \sylFinal -> do
765 forM_ (sylInitToSylFinalToPhon & Map.toList) \(sylInit, sylFinalToPhon) -> do
766 HTML.div do
767 sylInit & HTML.toHtml
768 forM_ sylFinals \sylFinal -> do
769 HTML.div do
770 forM_ (sylFinalToPhon & Map.lookup sylFinal & fromMaybe []) \phons -> do
771 phons & show & HTML.toHtml
772 -}
773
774 data PronDictKey = PronDictKey Text
775 deriving (Eq)
776 instance Ord PronDictKey where
777 PronDictKey x `compare` PronDictKey y =
778 compare (Down $ Text.length x) (Down $ Text.length y)
779 <> compare x y
780
781
782 ful, pre, suf, inf :: Text -> [Text]
783 ful t =
784 [ borderLeftText <> t <> borderRightText
785 , borderLeftText <> t <> "'"
786 , "'" <> t <> borderRightText
787 , borderLeftText <> t <> "-"
788 , "-" <> t <> borderRightText
789 ]
790 pre t = [borderLeftText <> t, "-" <> t, "'" <> t]
791 suf t = [t <> borderRightText, t <> "-", t <> "'", t <> ",", t <> "."]
792 inf t = [t]