src/Language/Pron.hs

   1 {-# LANGUAGE OverloadedLists #-}
   2 {-# LANGUAGE OverloadedStrings #-}
   3 {-# LANGUAGE StandaloneDeriving #-}
   4
   5 module Language.Pronunciation where
   6
   7 import Control.Applicative (asum)
   8 import Data.List qualified as List
   9 import Data.Map.Strict qualified as Map
  10 import Data.Set qualified as Set
  11 import Data.Text qualified as Text
  12 import Data.Text.Lazy qualified as TextLazy
  13 import Language
  14 import Paths_worksheets qualified as Self
  15 import System.FilePath.Posix ((</>))
  16 import System.FilePath.Posix qualified as File
  17 import Text.Blaze.Html5.Attributes qualified as HA
  18 import Worksheets.Utils.Char qualified as Char
  19 import Worksheets.Utils.HTML (className, classes, cm, styles, (!))
  20 import Worksheets.Utils.HTML qualified as HTML
  21 import Worksheets.Utils.IPA qualified as IPA
  22 import Worksheets.Utils.Paper qualified as Paper
  23 import Worksheets.Utils.Prelude
  24 import Prelude (error)
  25
  26 -- import Data.Radix1Tree.Word8.Key.Unsafe qualified as RT
  27 -- import Data.Radix1Tree.Word8.Strict qualified as RT
  28
  29 -- import Data.List.Split qualified as Split
  30
  31 -- radixFromList :: [(Text, a)] -> RT.Radix1Tree a
  32 -- radixFromList = foldr (\(k, a) p -> RT.insert (RT.unsafeFeedText k) a p) RT.empty
  33
  34 data Pronunciation = PronunciationIPABroad
  35   { pronunciationText :: Text
  36   , pronunciationIPA :: IPA.Syllable []
  37   }
  38   deriving (Eq, Ord, Show)
  39 instance IsString Pronunciation where
  40   fromString = \case
  41     "" -> PronunciationIPABroad "" $ IPA.Syllable []
  42     s -> PronunciationIPABroad (s & Text.pack) $ fromString s
  43
  44 data Lexeme = Lexeme
  45   { lexemeKey :: Text
  46   , -- , lexemeNext :: Text
  47     lexemePron :: Pronunciation
  48   }
  49   deriving (Eq, Show, Generic)
  50
  51 data LexemePron = LexemePron
  52   { lexemePronunciation :: Pronunciation
  53   , lexemeExample :: [Literal]
  54   }
  55   deriving (Eq, Show)
  56
  57 data Literal = Literal
  58   { literalText :: ShortText
  59   , literalTags :: Set LiteralTag
  60   }
  61   deriving (Eq, Ord, Show)
  62 instance IsString Literal where
  63   fromString s =
  64     Literal
  65       { literalText = s & fromString
  66       , literalTags = Set.empty
  67       }
  68 data LiteralTag
  69   = LiteralTagOccurence
  70   | LiteralTagMeta
  71   | LiteralTagSilent
  72   deriving (Eq, Ord, Show)
  73
  74 hyphen =
  75   Literal
  76     { literalText = "-"
  77     , literalTags = [LiteralTagMeta] & Set.fromList
  78     }
  79 occurence lit = lit{literalTags = lit & literalTags & Set.insert LiteralTagOccurence}
  80 silent lit = lit{literalTags = lit & literalTags & Set.insert LiteralTagSilent}
  81
  82 type SyllableText = ShortText
  83 type SyllableBroad = IPA.Syllable []
  84 type SyllablesTable = Map SyllableText (Map SyllableText [([Literal], SyllableBroad)])
  85
  86 type ContextToLexemePron = (LexemePron, Map (IPA.Syllable []) LexemePron)
  87 data Machine = Machine
  88   { machinePron :: Maybe Pronunciation
  89   , machineExample :: Set [Literal]
  90   , machineAlts :: Map Trans Machine
  91   -- ^ Those 'Machine's must not be recursive.
  92   }
  93   deriving (Eq, Show)
  94 machine =
  95   Machine
  96     { machinePron = Nothing
  97     , machineExample = Set.empty
  98     , machineAlts = Map.empty
  99     }
 100
 101 {-
 102 instance Monoid Machine where
 103   mempty = Machine
 104     { machinePron = ""
 105     , machineExample = []
 106     , machineAlts = Map.empty
 107     }
 108 instance Semigroup Machine where
 109   x <> y = Machine
 110     { machinePron = if machinePron x == machinePron y
 111                     then machinePron x
 112                     else errorShow (machinePron x, machinePron y)
 113     , machineExample = machineExample x <> machineExample y
 114     , machineAlts = Map.unionWith (<>) (machineAlts x) (machineAlts y)
 115     }
 116 -}
 117 machineExamples :: Machine -> Set [Literal]
 118 machineExamples mach =
 119   machineAlts mach
 120     & foldMap machineExamples
 121     & Set.union (machineExample mach)
 122
 123 data Input
 124   = InputChar
 125   | InputBorder
 126   | Input
 127
 128 data Chunk = Chunk
 129   { chunkInterval :: [Trans]
 130   , chunkInputNext :: [Char]
 131   , chunkMachine :: Machine
 132   }
 133   deriving (Eq, Show, Generic)
 134
 135 chunkText :: Chunk -> Text
 136 chunkText Chunk{chunkInterval} =
 137   chunkInterval
 138     & List.foldl'
 139       ( \acc -> \case
 140           TransConsume (ObservChar c) -> c : acc
 141           _ -> acc
 142       )
 143       []
 144     & Text.pack
 145
 146 chunkPronunciation :: Chunk -> Text
 147 chunkPronunciation Chunk{chunkMachine} =
 148   chunkMachine
 149     & machinePron
 150     & maybe "" pronunciationText
 151
 152 chunksWords :: [Chunk] -> [[Chunk]]
 153 chunksWords [] = []
 154 chunksWords chks = word0 : chunksWords next
 155   where
 156     (word0, rest) = chks & List.span (isSep >>> not)
 157     (_sep, next) = rest & List.span isSep
 158     isSep chk = chk & chunkInterval & List.elem (TransConsume ObservSpace)
 159
 160 inputToObservs :: Text -> [Observ]
 161 inputToObservs t = ObservBorder : Text.foldr (\c acc -> ObservChar c : acc) [ObservBorder] t
 162
 163 data State
 164   = StateBorder
 165   | StateNonBorder
 166
 167 transChunk :: Trans -> Chunk -> Maybe Chunk
 168 transChunk obs chunk = do
 169   nextMachine <- chunk & chunkMachine & machineAlts & Map.lookup trans
 170   Chunk
 171     { chunkMachine = nextMachine
 172     , chunkInterval = trans : chunkInterval chunk
 173     , chunkInputNext = chunkInputNext chunk
 174     }
 175
 176 borderChunk chunk =
 177   chunk
 178     & transChunk (TransLookAhead ObservBorder)
 179
 180 readChunk :: Bool -> Chunk -> Maybe Chunk
 181 readChunk isBorder chunk =
 182   case chunk & chunkInputNext of
 183     [] | not isBorder -> chunk & borderChunk >>= readChunk False
 184        | otherwise -> Just chunk
 185     i:is ->
 186       | not isBorder && or [Char.isSpace i, ]
 187
 188 {-
 189 consumeChar :: State -> Chunk -> Maybe Chunk
 190 consumeChar st chunk = Nothing
 191   | st == StateBorder = (chunk, [])
 192   , otherwise = chunk & advanceChunk ObservBorder
 193 consumeChar (c:cs) chunk
 194   | st == StateBorder =
 195   Char
 196   case ob of
 197     ObservBorder
 198       | Just nextMach <- jumps & Map.lookup (TransLookAhead ObservBorder) ->
 199           Just Chunk
 200             { chunkInputNext = obs
 201             , chunkMachine = nextMach
 202             , chunkInterval = TransLookAhead ObservBorder : chunkInterval chunk
 203             }
 204     ObservChar c
 205       | Just nextMach <- jumps & Map.lookup (ObservChar c) ->
 206           Just Chunk
 207             { chunkInputNext = obs
 208             , chunkMachine = nextMach
 209             , chunkInterval = TransConsume ob : chunkInterval chunk
 210             }
 211       | Char.isSpace c
 212       , Just nextMach <- jumps & Map.lookup ObservSpace ->
 213           Just Chunk
 214             { chunkInputNext = obs
 215             , chunkMachine = nextMach
 216             , chunkInterval = TransConsume ob : chunkInterval chunk
 217             }
 218       | Char.isPunctuation c
 219       , Just nextMach <- jumps & Map.lookup ObservPunctuation ->
 220           Just Chunk
 221             { chunkInputNext = obs
 222             , chunkMachine = nextMach
 223             , chunkInterval = TransConsume ob : chunkInterval chunk
 224             }
 225   where
 226   jumps = chunk & chunkMachine & machineAlts
 227   piorities :: [Observ]
 228   piorities=
 229     TransConsume ob :
 230     TransLookAhead ob :
 231     case ob of
 232       ObservChar c ->
 233         [ TransConsume o | o <- obsClasses ]
 234         [ TransLookAhead o | o <- obsClasses ]
 235         where
 236         obsClasses =
 237           mconcat
 238             [ [ObservSpace | Char.isSpace c]
 239             , [ObservPunctuation | Char.isPunctuation c]
 240             , [ObservMark | Char.isMark c]
 241             , [ObservSymbol | Char.isSymbol c]
 242             , [ObservSeparator | Char.isSeparator c]
 243             ]
 244 -}
 245
 246 runMachine :: Machine -> Text -> Either ([Trans], Text) [Chunk]
 247 runMachine machInit = inputToObservs >>> loop True [] initChunk
 248   where
 249     initChunk inp =
 250       Chunk
 251         { chunkInterval = []
 252         , chunkInputNext = inp
 253         , chunkMachine = machInit
 254         }
 255     loop :: Bool -> [Chunk] -> Chunk -> [Observ] -> Either ([Trans], Text) [Chunk]
 256     loop isInit prevChunks chunk currentInput =
 257       -- traceShow ("runMachine.loop" :: Text, isInit, chunk{chunkMachine = Machine "" [] []}, currentInput) $
 258       if null currentInput
 259         then Right [chunk & chunkUpdateBehind prevChunks & chunkUpdateAhead []]
 260         else
 261           case chunk & updateChunkWithInput currentInput of
 262             Just (newChunk, inputNext) -> loop False prevChunks newChunk inputNext
 263             Nothing
 264               | isInit -> Left (chunk & chunkInterval, currentInput & Text.take 10)
 265               | otherwise -> do
 266                   -- Reset the machine to process the next chunk
 267                   nextChunks <- loop True (chunk : prevChunks) (initChunk currentInput) currentInput
 268                   Right $ (chunk & chunkUpdateBehind prevChunks & chunkUpdateAhead nextChunks) : nextChunks
 269
 270 updateChunkWithInput :: Text -> Chunk -> Maybe (Chunk, Text)
 271 updateChunkWithInput input chunk =
 272   -- traceShow ("updateChunkWithInput" :: Text, chunk{chunkMachine = Machine "" [] []}) $
 273   [ (matchingLength, (newChunk, inputRest))
 274   | (trans, transMach) <- chunk & chunkMachine & machineAlts & Map.toList
 275   , let matchingLength = transMatchingLength input trans
 276   , 0 < matchingLength || not (isTransConsume trans)
 277   , -- , transMach & machinePron & isJust
 278   let (inputRead, inputRest) = input & Text.splitAt matchingLength
 279   , let newChunk =
 280           Chunk
 281             { -- chunkText = chunkText chunk <> inputRead
 282               chunkMachine = transMach
 283             , chunkInterval = trans : chunkInterval chunk
 284             }
 285   ]
 286     & Map.fromListWith (\new old -> old)
 287     & Map.lookupMax
 288     <&> snd
 289   where
 290     transMatchingLength :: Text -> Trans -> Int
 291     transMatchingLength inp = \case
 292       TransConsume obs ->
 293         case obs of
 294           ObservChar prefix
 295             | Text.isPrefixOf prefix inp -> Text.length prefix
 296             | Text.isPrefixOf prefix (inp & Text.take (Text.length prefix) & Text.toLower) -> Text.length prefix
 297             | otherwise -> 0
 298           ObservSpace
 299             | Just (c0, _) <- Text.uncons inp
 300             , Char.isSpace c0 ->
 301                 1
 302             | otherwise -> 0
 303           ObservPunctuation
 304             | Just (c0, _) <- Text.uncons inp
 305             , Char.isPunctuation c0 ->
 306                 1
 307             | otherwise -> 0
 308           ObservMark
 309             | Just (c0, _) <- Text.uncons inp
 310             , Char.isMark c0 ->
 311                 1
 312             | otherwise -> 0
 313           ObservNumber
 314             | Just (c0, _) <- Text.uncons inp
 315             , Char.isNumber c0 ->
 316                 1
 317             | otherwise -> 0
 318           ObservSeparator
 319             | Just (c0, _) <- Text.uncons inp
 320             , Char.isSeparator c0 ->
 321                 1
 322             | otherwise -> 0
 323           ObservSymbol
 324             | Just (c0, _) <- Text.uncons inp
 325             , Char.isSymbol c0 ->
 326                 1
 327             | otherwise -> 0
 328           _ -> 0
 329       _ -> 0
 330
 331 -- transNextOrId :: Trans -> Machine -> Machine
 332 -- transNextOrId trans mach = mach & machineAlts & Map.findWithDefault mach trans
 333 filterTransConsume = List.filter (\case TransConsume{} -> True; _ -> False)
 334 isTransConsume = \case
 335   TransConsume{} -> True
 336   _ -> False
 337 applyTrans :: Trans -> Chunk -> Chunk
 338 applyTrans trans chunk =
 339   case chunk & chunkMachine & machineAlts & Map.lookup trans of
 340     Nothing -> chunk
 341     Just nextMachine ->
 342       -- \| Nothing <- nextMachine & machinePron -> chunk
 343       -- \| otherwise ->
 344       chunk
 345         { chunkMachine = nextMachine
 346         , chunkInterval = trans : chunkInterval chunk
 347         -- , chunkText = chunkText chunk
 348         }
 349 applyTransSeq :: [Trans] -> Chunk -> Chunk
 350 applyTransSeq [] chunk = chunk
 351 applyTransSeq (t : ts) chunk = chunk & applyTrans t & applyTransSeq ts
 352
 353 chunkUpdateAhead :: [Chunk] -> Chunk -> Chunk
 354 chunkUpdateAhead aheadChunks chunk =
 355   -- traceShow ("chunkUpdateAhead"::Text, chunk & chunkText, aheadObservs) $
 356   chunk & applyTransSeq [TransLookAhead x | x <- mconcat aheadObservs]
 357   where
 358     aheadObservs :: [[Observ]]
 359     aheadObservs =
 360       [ -- specific observation
 361         case aheadChunks of
 362           Chunk{chunkInterval = List.reverse . filterTransConsume -> TransConsume obs : _} : _ -> [obs]
 363           _ -> []
 364       , -- border observation
 365         case aheadChunks of
 366           Chunk{chunkInterval = List.reverse . filterTransConsume -> TransConsume obs : _} : _ ->
 367             case obs of
 368               ObservSpace -> [ObservBorder]
 369               ObservNumber -> [ObservBorder]
 370               ObservMark -> [ObservBorder]
 371               ObservPunctuation -> [ObservBorder]
 372               _ -> []
 373           [] -> [ObservBorder]
 374           _ -> []
 375       , -- vowel/semivowel/consonant observation
 376         case aheadChunks of
 377           Chunk{chunkMachine = Machine{machinePron = Just PronunciationIPABroad{pronunciationIPA = IPA.syllableToSegments -> seg : _}}} : _ ->
 378             case seg of
 379               IPA.Vowel{} -> [ObservVowel]
 380               IPA.Consonant (IPA.Pulmonic _phonation _place IPA.Approximant) -> [ObservSemiVowel]
 381               IPA.Consonant (IPA.Ejective _place IPA.Approximant) -> [ObservSemiVowel]
 382               _ -> [ObservConsonant]
 383           _ -> []
 384       ]
 385
 386 chunkUpdateBehind :: [Chunk] -> Chunk -> Chunk
 387 chunkUpdateBehind behindChunks chunk =
 388   -- traceShow ("chunkUpdateBehind"::Text, chunk & chunkText, behindObservs, behindChunks <&> chunkText) $
 389   chunk & applyTransSeq [TransLookBehind x | x <- mconcat behindObservs]
 390   where
 391     behindObservs :: [[Observ]]
 392     behindObservs =
 393       [ -- specific observation
 394         case behindChunks of
 395           Chunk{chunkInterval = filterTransConsume -> TransConsume obs : _} : _ -> [obs]
 396           _ -> []
 397       , -- border observation
 398         case behindChunks of
 399           Chunk{chunkInterval = filterTransConsume -> TransConsume obs : _} : _ ->
 400             case obs of
 401               ObservSpace -> [ObservBorder]
 402               ObservNumber -> [ObservBorder]
 403               ObservMark -> [ObservBorder]
 404               ObservPunctuation -> [ObservBorder]
 405               _ -> []
 406           [] -> [ObservBorder]
 407           _ -> []
 408       , -- vowel/semivowel/consonant observation
 409         case behindChunks of
 410           Chunk{chunkMachine = Machine{machinePron = Just PronunciationIPABroad{pronunciationIPA = IPA.syllableToSegments >>> List.reverse -> seg : _}}} : _ ->
 411             case seg of
 412               IPA.Vowel{} -> [ObservVowel]
 413               IPA.Consonant (IPA.Pulmonic _phonation _place IPA.Approximant) -> [ObservSemiVowel]
 414               IPA.Consonant (IPA.Ejective _place IPA.Approximant) -> [ObservSemiVowel]
 415               _ -> [ObservConsonant]
 416           _ -> []
 417       ]
 418
 419 -- planète
 420 data Trans
 421   = TransConsume Observ
 422   | TransLookAhead Observ
 423   | TransLookBehind Observ
 424   | TransDefinition Text
 425   deriving (Eq, Ord, Show)
 426 data Observ
 427   = ObservChar Char
 428   | ObservConsonant
 429   | ObservSemiVowel
 430   | ObservVowel
 431   | ObservLetter
 432   | ObservMark
 433   | ObservNumber
 434   | ObservPunctuation
 435   | ObservSeparator
 436   | ObservSpace
 437   | ObservSymbol
 438   | ObservBorder
 439   deriving (Eq, Ord, Show)
 440 instance IsString [Trans] where
 441   fromString = fmap (TransConsume . ObservChar)
 442
 443 data LexerDict = LexerDict
 444   { lexerDictMap :: Map Text (Map PronContext LexemePron)
 445   , lexerDictMaxKeyLength :: Int
 446   }
 447
 448 {-
 449 contextToLexemePron :: [(Text, ContextToLexemePron)] -> [(Text, LexemePron)]
 450 contextToLexemePron =
 451   foldr
 452     (
 453       \(key, (defLex, nextTolex))
 454       acc@((nextKey,nextLex):as) ->
 455       Map.findWithDefault defLex nextLex nextToLex:acc
 456     ) []
 457 -}
 458
 459 borderLeftChar = '⌟'
 460 borderRightChar = '⌞'
 461 borderLeftText = borderLeftChar & Text.singleton
 462 borderRightText = borderRightChar & Text.singleton
 463 borderInnerText = borderRightText <> borderLeftText
 464
 465 {-
 466 mapLookupLonguest :: Text -> LexerDict -> Maybe Lexeme
 467 mapLookupLonguest inp LexerDict{..} =
 468   [ lexerDictMap & Map.lookup (key & Text.toLower)
 469                  & maybeToList
 470                  & foldMap \nextToVariant ->
 471       let nextMax :: Int =
 472             nextToVariant & Map.keys
 473                           <&> Text.length
 474                           & Set.fromList
 475                           & Set.lookupMax
 476                           & fromMaybe (error "empty map")
 477             in
 478         [ nextToVariant & Map.lookup (next & Text.toLower)
 479                         <&> (\variant -> Lexeme{lexemeKey=key, lexemeNext=next, lexemePron=variant & lexemePronunciation})
 480                         & maybeToList
 481         | next <- inp & Text.drop (key & Text.length)
 482                       & Text.take nextMax
 483                       & Text.inits
 484                       & List.reverse
 485         ] & mconcat
 486   | key <- inp & Text.take lexerDictMaxKeyLength
 487                & Text.inits
 488                & List.reverse
 489   , not $ Text.null key
 490   ]
 491     & mconcat
 492     & headMaybe
 493 -}
 494
 495 lexerInit :: Text -> Text
 496 lexerInit input =
 497   borderLeftText <> innerInput <> borderRightText
 498   where
 499     innerInput = input & Text.replace " " borderInnerText
 500
 501 type LexerError = (Text, [Text])
 502
 503 data PronContext
 504   = PronContextStressed
 505   | PronContextUnstressed
 506   | PronContextBeforeBorder
 507   | PronContextBeforeAnyVowel
 508   | -- | Aka. semi-vowels
 509     PronContextBeforeAnySemiVowel
 510   | PronContextBeforeAnyConsonant
 511   | PronContextBeforeAny
 512   | PronContextBeforeSegment IPA.Segment
 513   deriving (Eq, Ord, Show)
 514 deriving instance Ord (IPA.Syllable [])
 515 deriving instance Ord IPA.SuprasegmentalFeature
 516 deriving instance Ord IPA.SegmentalFeature
 517 deriving instance Ord IPA.Sibilance
 518 deriving instance Ord IPA.Manner
 519 deriving instance Ord IPA.Phonation
 520 deriving instance Ord IPA.Roundedness
 521 deriving instance Ord IPA.Height
 522 deriving instance Ord IPA.Vowel
 523 deriving instance Ord IPA.Consonant
 524 deriving instance Ord IPA.Segment
 525
 526 lexer :: LexerDict -> Text -> Either LexerError [Lexeme]
 527 lexer dict input = input & lexerInit & splitChunks <&> foldChunks
 528   where
 529     nextChunk :: Text -> Maybe (Text, Map PronContext LexemePron)
 530     nextChunk inp =
 531       [ dict & lexerDictMap & Map.lookup (key & Text.toLower) <&> (key,) & maybeToList
 532       | key <-
 533           inp
 534             & Text.take (dict & lexerDictMaxKeyLength)
 535             & Text.inits
 536             & List.reverse
 537       , not $ Text.null key
 538       ]
 539         & mconcat
 540         & headMaybe
 541     splitChunks :: Text -> Either LexerError [(Text, Map PronContext LexemePron)]
 542     splitChunks inp =
 543       inp
 544         & nextChunk
 545         & \case
 546           Nothing -> Left (inp, keys)
 547             where
 548               keys =
 549                 inp
 550                   & Text.take (dict & lexerDictMaxKeyLength)
 551                   & Text.inits
 552                   & List.reverse
 553           Just kv@(key, _contextToVariant)
 554             | otherwise ->
 555                 if Text.null inpNext
 556                   then Right [kv]
 557                   else case splitChunks inpNext of
 558                     Left (rest, keys) -> Left (rest, keys)
 559                     Right res' -> Right (kv : res')
 560             where
 561               inpNext = inp & Text.drop (Text.length key)
 562     dropZeros :: [Lexeme] -> [Lexeme]
 563     dropZeros = List.dropWhile \case
 564       Lexeme{lexemePron = PronunciationIPABroad{pronunciationIPA = IPA.Syllable [IPA.Zero{}]}} -> True
 565       _ -> False
 566     foldChunks :: [(Text, Map PronContext LexemePron)] -> [Lexeme]
 567     foldChunks =
 568       foldr
 569         ( \(chunk, contextToVariant) afters ->
 570             (: afters) $
 571               fromMaybe (Lexeme chunk $ PronunciationIPABroad "" (IPA.Syllable [IPA.Zero])) $
 572                 asum $
 573                   list
 574                     [ case afters of
 575                         [] -> Nothing
 576                         Lexeme{lexemeKey} : _ ->
 577                           case lexemeKey & Text.unpack of
 578                             c : _cs
 579                               | c & Char.isAlphaNum & not ->
 580                                   contextToVariant
 581                                     & Map.lookup PronContextBeforeBorder
 582                                     <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
 583                             _ -> Nothing
 584                     , case afters & dropZeros of
 585                         [] -> Nothing
 586                         Lexeme{lexemePron = PronunciationIPABroad{pronunciationIPA = pron}} : _ ->
 587                           case pron & IPA.syllableToSegments of
 588                             seg : _
 589                               | Just LexemePron{lexemePronunciation} <-
 590                                   contextToVariant & Map.lookup (PronContextBeforeSegment seg) ->
 591                                   Just $ Lexeme chunk lexemePronunciation
 592                             IPA.Vowel{} : _ ->
 593                               contextToVariant
 594                                 & Map.lookup PronContextBeforeAnyVowel
 595                                 <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
 596                             IPA.Consonant consonant : _ ->
 597                               case consonant of
 598                                 IPA.Pulmonic _phonation _place IPA.Approximant ->
 599                                   contextToVariant
 600                                     & Map.lookup PronContextBeforeAnySemiVowel
 601                                     <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
 602                                 IPA.Ejective _place IPA.Approximant ->
 603                                   contextToVariant
 604                                     & Map.lookup PronContextBeforeAnySemiVowel
 605                                     <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
 606                                 _ ->
 607                                   contextToVariant
 608                                     & Map.lookup PronContextBeforeAnyConsonant
 609                                     <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
 610                             _ -> Nothing
 611                     , contextToVariant
 612                         & Map.lookup PronContextBeforeAny
 613                         <&> \LexemePron{lexemePronunciation} -> Lexeme chunk lexemePronunciation
 614                     ]
 615         )
 616         []
 617
 618 lexerDict lexerDictMap =
 619   LexerDict
 620     { lexerDictMap
 621     , lexerDictMaxKeyLength =
 622         lexerDictMap
 623           & Map.keys
 624           <&> Text.length
 625           & Set.fromList
 626           & Set.lookupMax
 627           & fromMaybe (error "empty map")
 628     }
 629
 630 lexerPron :: LexerDict -> Text -> Either LexerError [(Text, Text)]
 631 lexerPron dict inp =
 632   inp
 633     & lexer dict
 634     <&> foldMap
 635       ( \Lexeme{..} ->
 636           lexemePron & \case
 637             PronunciationIPABroad txt (IPA.Syllable [IPA.Zero]) -> [(lexemeKey, txt)]
 638             PronunciationIPABroad txt _ipa -> [(lexemeKey, txt)]
 639       )
 640
 641 data FrenchToken = FrenchToken
 642   { tokenText :: Text
 643   , tokenSound :: Text
 644   }
 645
 646 lexerChunks :: LexerDict -> Text -> Either LexerError [FrenchToken]
 647 lexerChunks dict inp =
 648   inp
 649     & lexer dict
 650     <&> foldMap
 651       ( \Lexeme{..} ->
 652           lexemePron & \case
 653             PronunciationIPABroad{pronunciationIPA = IPA.Syllable [IPA.Zero]} -> [FrenchToken lexemeKey ""]
 654             PronunciationIPABroad{pronunciationText} -> [FrenchToken lexemeKey pronunciationText]
 655       )
 656
 657 groupLexemes :: [Lexeme] -> [[Lexeme]]
 658 groupLexemes ls =
 659   ls -- & Split.split ( Split.whenElt (\(t, _) -> t == "\n") & Split.dropDelims)
 660     & group
 661   where
 662     group :: [Lexeme] -> [[Lexeme]]
 663     group [] = []
 664     -- group (inpHead@("\n", lex) : inpTail) =
 665     group (inpHead : inpTail) =
 666       if inpHead & lexemeKey & Text.isPrefixOf borderLeftText
 667         then
 668           ( let (seps, rest) = inpTail & List.span \l -> not $ l & lexemeKey & Text.isPrefixOf borderLeftText
 669             in (inpHead : seps) : group rest
 670           )
 671         else error "groupLexemes"
 672
 673 pronunciation :: LexerDict -> [Either Lexeme Text] -> [[Lexeme]]
 674 pronunciation dict ts =
 675   [ case lexOrText of
 676     Left lex -> [lex]
 677     Right txt ->
 678       txt
 679         & lexer dict
 680         & either (error . TextLazy.unpack . pShow) id
 681         -- & List.intercalate [("\n", LexemePron{lexemePronunciation=PronunciationSilent, lexemeExample=[]})]
 682       where
 683
 684   | -- lines = txt & Text.split (== '\n')
 685   lexOrText <- ts
 686   ]
 687     & mconcat
 688     & groupLexemes
 689
 690 lexerWords dict =
 691   [ (word, word & lexerPron dict)
 692   | word <-
 693       [ "poule"
 694       ]
 695   ]
 696
 697 -- 1|de,ʁə,ʁe,ʁa
 698 {-
 699 syllablesTable :: SyllablesTable
 700 syllablesTable =
 701   [ "b" :=
 702       [ "a" := ["ba" := "ba"]
 703       , "e" := ["ber" := "bɛʁ", "be" := "bə", "be" := "bɛ"]
 704       , "é" := ["bé" := "be"]
 705       ]
 706   ]
 707 -}
 708 -- 1|de,ʁə,ʁe,ʁa
 709 -- 2|ʁa,li,a,ti,ʁɛ,zə,ɑ̃
 710 -- 3|ɑ̃,ni,ʁi,ka,e,ze,ʁɔ̃,ta,te,si,za
 711 -- 4|za,to,ma,na,di,mi,kɔ̃,kɔ,o,sjɔ̃,tə,la,pa,ne
 712 -- 5|ne,i,fi,se,bi,də,ɔ,ɛ̃,sje,ʁje,me,nə,ʁjɔ̃,mɑ̃,le,tɔ,lɔ,pi,ba,vi,zɑ̃
 713
 714 syllablesTableToHTML :: SyllablesTable -> IO HTML.Html
 715 syllablesTableToHTML sylInitToSylFinalToPhon = do
 716   -- FIXME: this absolute path is not portable out of my system
 717   dataPath <- Self.getDataDir <&> File.normalise
 718   return do
 719     -- let pageOrientation = Paper.PageOrientationLandscape
 720     -- let pageSize = Paper.PageSizeA4
 721     HTML.docTypeHtml do
 722       HTML.head do
 723         HTML.title $ "Syllabes"
 724         forM_
 725           ( [ "Paper.css"
 726           -- , "SyllableTable.css"
 727             ]
 728               & list
 729           )
 730           \cssFile ->
 731             HTML.link
 732               ! HA.rel "stylesheet"
 733               ! HA.type_ "text/css"
 734               ! HA.href (dataPath </> "styles" </> cssFile & HTML.toValue)
 735       -- HTML.styleCSS $ HTML.cssPrintPage pageOrientation pageSize
 736       let sylInits = sylInitToSylFinalToPhon & Map.keysSet
 737       let sylFinals = sylInitToSylFinalToPhon & Map.elems & foldMap Map.keysSet
 738       HTML.body ! classes ["A4"] $ do
 739         "\n"
 740         forM_ ([1 :: Int .. 3] & list) \page ->
 741           HTML.section
 742             ! classes ["sheet"]
 743             $ do
 744               HTML.article do
 745                 "test" <> show page & HTML.toHtml
 746
 747 {-
 748 HTML.div
 749   ! classes ["main-page"]
 750   $ do
 751     HTML.div
 752       ! classes
 753         [ "syllable-table"
 754         , "sub-page"
 755         , "page-" <> className pageSize <> "-" <> className pageOrientation
 756         ]
 757       ! styles
 758         [ "grid-template-columns" := (1 & fr & HTML.toCSS) & List.replicate (1 + Set.size sylFinals) & List.unwords
 759         ]
 760       $ do
 761         forM_ sylFinals \sylFinal -> do
 762           HTML.div do
 763             sylFinal & HTML.toHtml
 764         forM_ sylFinals \sylFinal -> do
 765           forM_ (sylInitToSylFinalToPhon & Map.toList) \(sylInit, sylFinalToPhon) -> do
 766             HTML.div do
 767               sylInit & HTML.toHtml
 768               forM_ sylFinals \sylFinal -> do
 769                 HTML.div do
 770                   forM_ (sylFinalToPhon & Map.lookup sylFinal & fromMaybe []) \phons -> do
 771                     phons & show & HTML.toHtml
 772 -}
 773
 774 data PronDictKey = PronDictKey Text
 775   deriving (Eq)
 776 instance Ord PronDictKey where
 777   PronDictKey x `compare` PronDictKey y =
 778     compare (Down $ Text.length x) (Down $ Text.length y)
 779       <> compare x y
 780
 781
 782 ful, pre, suf, inf :: Text -> [Text]
 783 ful t =
 784   [ borderLeftText <> t <> borderRightText
 785   , borderLeftText <> t <> "'"
 786   , "'" <> t <> borderRightText
 787   , borderLeftText <> t <> "-"
 788   , "-" <> t <> borderRightText
 789   ]
 790 pre t = [borderLeftText <> t, "-" <> t, "'" <> t]
 791 suf t = [t <> borderRightText, t <> "-", t <> "'", t <> ",", t <> "."]
 792 inf t = [t]