src/Language/Pronunciation.hs

   1 {-# LANGUAGE OverloadedLists #-}
   2 {-# LANGUAGE OverloadedStrings #-}
   3 {-# LANGUAGE RankNTypes #-}
   4 {-# LANGUAGE StandaloneDeriving #-}
   5 {-# OPTIONS_GHC -Wno-orphans #-}
   6
   7 module Language.Pronunciation where
   8
   9 import Control.Applicative (Alternative (..))
  10 import Control.Monad.Combinators qualified as P
  11 import Control.Monad.Trans.Class qualified as MT
  12 import Control.Monad.Trans.State qualified as MT
  13 import Data.List qualified as List
  14 import Data.List.Zipper qualified as LZ
  15 import Data.Map.Strict qualified as Map
  16 import Data.Set qualified as Set
  17 import Data.Text qualified as Text
  18 import Data.Text.Short qualified as TextShort
  19 import Data.Traversable (traverse)
  20 import Language
  21 import Paths_worksheets qualified as Self
  22 import System.FilePath.Posix ((</>))
  23 import System.FilePath.Posix qualified as File
  24 import Text.Blaze.Html5.Attributes qualified as HA
  25 import Text.Megaparsec qualified as P
  26 import Worksheets.Utils.Char qualified as Char
  27 import Worksheets.Utils.HTML (className, classes, styles, (!))
  28 import Worksheets.Utils.HTML qualified as HTML
  29 import Worksheets.Utils.IPA qualified as IPA
  30 import Worksheets.Utils.Paper qualified as Paper
  31 import Worksheets.Utils.Prelude
  32
  33 data Pronunciation = Pronunciation
  34   { pronunciationIPABroad :: [IPA.Syllable []]
  35   , pronunciationText :: Text
  36   }
  37   deriving (Eq, Ord, Show)
  38 instance Semigroup Pronunciation where
  39   x <> y =
  40     Pronunciation
  41       { pronunciationIPABroad = pronunciationIPABroad x <> pronunciationIPABroad y
  42       , pronunciationText =
  43           [pronunciationText x, pronunciationText y]
  44             & List.filter (/= "")
  45             & Text.intercalate "."
  46       }
  47 instance IsList Pronunciation where
  48   type Item Pronunciation = IPA.Syllable []
  49   toList = pronunciationIPABroad
  50
  51   -- fromList :: HasCallStack => [Item Pronunciation] -> Pronunciation
  52   fromList l =
  53     Pronunciation
  54       { pronunciationIPABroad = ipa
  55       , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
  56       }
  57     where
  58       ipa =
  59         l
  60           & mapButLast (IPA.WithSuprasegmentalFeature IPA.Break)
  61           & fromList
  62 instance IsString Pronunciation where
  63   fromString s =
  64     Pronunciation
  65       { pronunciationIPABroad = ipa
  66       , pronunciationText = ipa & foldMap (IPA.toIPA_ >>> IPA.unIPA)
  67       }
  68     where
  69       ipa =
  70         s
  71           & Text.pack
  72           & IPA.parseSyllables @[]
  73           & either errorShow id
  74
  75 {-
  76 data PronunciationKey = PronunciationKey
  77   { pronunciationKeyText :: Text
  78   , pronunciationKeyPron :: Pronunciation
  79   , pronunciationKey
  80   }
  81 -}
  82
  83 newtype Pronunciations = Pronunciations
  84   { unPronunciations :: [(RuleLexemes, Pronunciation)]
  85   }
  86   deriving (Eq, Ord)
  87   deriving newtype (Show)
  88   deriving newtype (Semigroup)
  89   deriving newtype (Monoid)
  90 joinPronunciations :: Pronunciations -> Pronunciations
  91 joinPronunciations (Pronunciations ps) =
  92   Pronunciations
  93     [ input :=
  94         Pronunciation
  95           { pronunciationIPABroad = ipa
  96           , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
  97           }
  98     ]
  99   where
 100     input :: RuleLexemes
 101     ipa :: [IPA.Syllable []]
 102     (input, ipa) =
 103       ps
 104         & List.foldr
 105           ( \(inp, Pronunciation{pronunciationIPABroad}) (suffix, l, acc) ->
 106               case pronunciationIPABroad of
 107                 [] -> (inp <> suffix, IPA.Syllable [], acc)
 108                 [syl@(IPA.Syllable [])] -> (inp <> suffix, syl, acc)
 109                 [syl] -> (inp <> suffix, IPA.Syllable [], (syl <> l) : acc)
 110                 [sylL, sylR] -> (inp <> suffix, sylL, glueSyllableToTheRight sylR acc)
 111                 _ -> errorShow pronunciationIPABroad
 112           )
 113           ( ""
 114           , IPA.Syllable []
 115           , []
 116           )
 117         & ( \(i, l, acc) ->
 118               ( i
 119               , glueSyllableToTheRight l acc
 120                   & mapButLast (IPA.setSuprasegmentalFeatures [IPA.Break])
 121               )
 122           )
 123     glueSyllableToTheRight ::
 124       IPA.Syllable [] ->
 125       [IPA.Syllable []] ->
 126       [IPA.Syllable []]
 127     glueSyllableToTheRight x y =
 128       case y of
 129         [] -> [x]
 130         [yL] -> [x <> yL]
 131         yL : yR -> x <> yL : yR
 132 instance IsList Pronunciations where
 133   type Item Pronunciations = (RuleLexemes, Pronunciation)
 134   toList = unPronunciations
 135   fromList l = Pronunciations{unPronunciations = l & fromList}
 136
 137 {-
 138 instance IsString Pronunciations where
 139   fromString = \case
 140     "" -> Pronunciations "" [IPA.Syllable [IPA.Zero]]
 141     s -> Pronunciations (s & Text.pack) $
 142           fromString s
 143             & IPA.parseSyllables @[]
 144             & either errorShow id
 145 -}
 146 data ExampleLiteral = ExampleLiteral
 147   { exampleLiteralText :: ShortText
 148   , exampleLiteralTags :: Set LiteralTag
 149   , exampleLiteralMeaning :: ShortText
 150   }
 151   deriving (Eq, Ord, Show)
 152 instance IsString ExampleLiteral where
 153   fromString s =
 154     ExampleLiteral
 155       { exampleLiteralText = s & fromString
 156       , exampleLiteralTags = Set.empty
 157       , exampleLiteralMeaning = ""
 158       }
 159 data LiteralTag
 160   = LiteralTagOccurence
 161   | LiteralTagMeta
 162   | LiteralTagSilent
 163   deriving (Eq, Ord, Show)
 164 exampleLiteralsText :: [ExampleLiteral] -> ShortText
 165 exampleLiteralsText ls = ls <&> exampleLiteralText & mconcat
 166
 167 hyphen =
 168   ExampleLiteral
 169     { exampleLiteralText = "-"
 170     , exampleLiteralTags = [LiteralTagMeta] & Set.fromList
 171     , exampleLiteralMeaning = ""
 172     }
 173 occurence lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagOccurence}
 174 silent lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagSilent}
 175
 176 type SyllableText = ShortText
 177 type SyllableBroad = IPA.Syllable []
 178 type SyllablesTable = Map SyllableText (Map SyllableText [([ExampleLiteral], SyllableBroad)])
 179
 180 data Adjacent
 181   = AdjacentBorder
 182   | AdjacentVowel
 183   | AdjacentConsonant
 184   deriving (Eq, Ord, Show)
 185 data Variant
 186   = VariantDefinition Text
 187   | VariantStress
 188   deriving (Eq, Ord, Show)
 189 data Rule = Rule
 190   { -- , ruleStress :: Bool
 191     -- , ruleDefinition :: Maybe Text
 192
 193     -- [ "e"    := ["ɛ"] , "x"    := ["g","z"] , "er"   := ["ɛʁ"] , "cice" := ["sis"] ]
 194     -- [ "exercice" := ["ɛg.zɛʁ.sis"]
 195     -- ]
 196     rulePron :: Pronunciations
 197   , ruleExamples :: Map InputLexemes Pronunciation
 198   }
 199   deriving (Eq, Ord, Show)
 200
 201 rule =
 202   Rule
 203     { rulePron = Pronunciations{unPronunciations = []}
 204     , ruleExamples = mempty
 205     }
 206 space = rule
 207 word
 208   , begining
 209   , ending ::
 210     RuleLexemes -> RuleLexemes
 211 word = begining >>> ending
 212 begining = after [LexemeBorder]
 213 ending = before [LexemeBorder]
 214 before ls r = RuleLexemes (unRuleLexemes r <> ls)
 215 after ls r = RuleLexemes (ls <> unRuleLexemes r)
 216 meaning r d = RuleLexemes (unRuleLexemes r <> [LexemeMeaning d])
 217
 218 type Table = Map RuleLexemes Rule
 219 examples :: Table -> Map InputLexemes Pronunciation
 220 examples tbl =
 221   [ v & ruleExamples
 222   | v <- tbl & Map.elems
 223   ]
 224     & Map.unionsWith (\new old -> if new == old then new else errorShow (new, old))
 225
 226 data Pron = Pron
 227   { pronInput :: [Lexeme]
 228   , pronRule :: Rule
 229   }
 230   deriving (Eq, Show)
 231
 232 data Syl = Syl
 233   { sylText :: Text
 234   , sylDependsOnBefore :: Bool
 235   , sylDependsOnAfter :: Bool
 236   , sylDependsOnMeaning :: Bool
 237   , sylSound :: Text -- [IPA.Syllable []]
 238   , sylIndex :: Int
 239   , sylSilent :: Bool
 240   , sylSplit :: Bool
 241   }
 242   deriving (Eq, Show)
 243
 244 addIndexes :: [[Either Char Pron]] -> [[Syl]]
 245 addIndexes = go 0
 246   where
 247     go _idx [] = []
 248     go idx (prons : next) = List.reverse prons' : go idx' next
 249       where
 250         (idx', prons') =
 251           prons
 252             & List.foldl'
 253               ( \(i, is) -> \case
 254                   Left c ->
 255                     ( i
 256                     , Syl
 257                         { sylText = Text.singleton c
 258                         , sylDependsOnAfter = False
 259                         , sylDependsOnBefore = False
 260                         , sylDependsOnMeaning = False
 261                         , sylSound = []
 262                         , sylIndex = i
 263                         , sylSilent = True
 264                         , sylSplit = False
 265                         }
 266                         : is
 267                     )
 268                   Right Pron{pronRule = Rule{rulePron = Pronunciations{unPronunciations = ps}}} ->
 269                     ps
 270                       & List.foldl'
 271                         ( \(j, js) (t, Pronunciation{..}) ->
 272                             let sylText = t & unRuleLexemes & lexemesChars & Text.pack
 273                             in case pronunciationIPABroad of
 274                                  []
 275                                    | not (Text.null pronunciationText) ->
 276                                        ( j + 1
 277                                        , Syl
 278                                            { sylText
 279                                            , sylSound = pronunciationText
 280                                            , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 281                                            , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 282                                            , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 283                                            , sylIndex = j + 1
 284                                            , sylSilent = False
 285                                            , sylSplit = False
 286                                            }
 287                                            : js
 288                                        )
 289                                  [IPA.Syllable []]
 290                                    | Text.null pronunciationText ->
 291                                        ( j
 292                                        , case js of
 293                                            [] ->
 294                                              Syl
 295                                                { sylText
 296                                                , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 297                                                , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 298                                                , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 299                                                , sylSound = ""
 300                                                , sylIndex = j
 301                                                , sylSilent = True
 302                                                , sylSplit = False
 303                                                }
 304                                                : js
 305                                            j0@Syl{sylText = j0t} : jss -> j0{sylText = j0t <> sylText} : jss
 306                                        )
 307                                  syls
 308                                    | (syls & all IPA.isSilent) && Text.null pronunciationText ->
 309                                        ( j
 310                                        , Syl
 311                                            { sylText
 312                                            , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 313                                            , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 314                                            , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 315                                            , sylSound = ""
 316                                            , sylIndex = j
 317                                            , sylSilent = True
 318                                            , sylSplit = False
 319                                            }
 320                                            : js
 321                                        )
 322                                  _syls@[_] ->
 323                                    ( j + 1
 324                                    , Syl
 325                                        { sylText
 326                                        , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 327                                        , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 328                                        , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 329                                        , sylSound = pronunciationText
 330                                        , sylIndex = j + 1
 331                                        , sylSilent = False
 332                                        , sylSplit = False
 333                                        }
 334                                        : js
 335                                    )
 336                                  _syls@[_, _] ->
 337                                    ( j
 338                                    , Syl
 339                                        { sylText
 340                                        , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 341                                        , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 342                                        , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 343                                        , sylSound = pronunciationText & Text.replace "." ""
 344                                        , sylIndex = j
 345                                        , sylSilent = False
 346                                        , sylSplit = True
 347                                        }
 348                                        : js
 349                                    )
 350                                  syls -> errorShow syls
 351                         )
 352                         (i, is)
 353                     where
 354                       sylDependsOnAfter =
 355                         List.reverse >>> \case
 356                           LexemeBorder : _ -> True
 357                           LexemeSilent : _ -> True
 358                           LexemeConsonant : _ -> True
 359                           LexemeDoubleConsonant : _ -> True
 360                           LexemeVowel : _ -> True
 361                           LexemeSemiVowel : _ -> True
 362                           _ -> False
 363                       sylDependsOnBefore =
 364                         \case
 365                           LexemeBorder : _ -> True
 366                           LexemeSilent : _ -> True
 367                           LexemeConsonant : _ -> True
 368                           LexemeDoubleConsonant : _ -> True
 369                           LexemeVowel : _ -> True
 370                           LexemeSemiVowel : _ -> True
 371                           _ -> False
 372                       sylDependsOnMeaning =
 373                         List.reverse >>> \case
 374                           LexemeMeaning{} : _ -> True
 375                           _ -> False
 376               )
 377               (idx, [])
 378
 379 withCapital :: [(RuleLexemes, Rule)] -> [(RuleLexemes, Rule)]
 380 withCapital =
 381   foldMap \(RuleLexemes pat, rul) ->
 382     [ (RuleLexemes pat, rul)
 383     ,
 384       ( RuleLexemes (withCapitalLexemes pat)
 385       , rul
 386           { rulePron = rul & rulePron & withCapitalPronunciations
 387           , ruleExamples =
 388               rul
 389                 & ruleExamples
 390                 & Map.mapKeys
 391                   ( unInputLexemes
 392                       >>> withCapitalLexemes
 393                       >>> InputLexemes
 394                   )
 395           }
 396       )
 397     ]
 398   where
 399     withCapitalPronunciations (Pronunciations []) = Pronunciations []
 400     withCapitalPronunciations (Pronunciations ((t, p) : ps)) =
 401       Pronunciations ((RuleLexemes $ withCapitalLexemes $ unRuleLexemes t, p) : ps)
 402     withCapitalLexemes (LexemeChar x : xs) = LexemeChar (Char.toUpper x) : xs
 403     withCapitalLexemes (x : xs) = x : withCapitalLexemes xs
 404     withCapitalLexemes [] = []
 405
 406 lexemesChars :: [Lexeme] -> [Char]
 407 lexemesChars p =
 408   p & foldMap \case
 409     LexemeChar c -> [c]
 410     _ -> []
 411
 412 run ::
 413   Table ->
 414   Text ->
 415   Either
 416     ( Either
 417         (P.ParseErrorBundle Text ())
 418         (P.ParseErrorBundle [Lexeme] ())
 419     )
 420     [Either Char Pron]
 421 run rules inp =
 422   inp
 423     & runLexer
 424     & either (Left . Left) \lexs ->
 425       lexs
 426         & runParser rules
 427         & either (Left . Right) Right
 428
 429 runParser :: Table -> [Lexeme] -> Either (P.ParseErrorBundle [Lexeme] ()) [Either Char Pron]
 430 runParser tbl inp = inp & P.runParser (parser tbl) "input"
 431
 432 parseLiterals ::
 433   Table ->
 434   [ExampleLiteral] ->
 435   Either
 436     ( Either
 437         (P.ParseErrorBundle Text ())
 438         (P.ParseErrorBundle [Lexeme] ())
 439     )
 440     [Either Char Pron]
 441 parseLiterals rules inp =
 442   inp
 443     & traverse
 444       ( \ExampleLiteral{..} ->
 445           exampleLiteralText
 446             & TextShort.toText
 447             & runLexer
 448             <&> ( <>
 449                     [ LexemeMeaning exampleLiteralMeaning
 450                     | exampleLiteralMeaning & TextShort.null & not
 451                     ]
 452                 )
 453       )
 454     & either (Left . Left) \lexs ->
 455       lexs
 456         & mconcat
 457         & runParser rules
 458         & either (Left . Right) Right
 459
 460 parser :: Table -> P.Parsec () [Lexeme] [Either Char Pron]
 461 parser tbl = do
 462   res <- P.many $ (Just . Right) <$> parseRules <|> parseChar
 463   P.eof
 464   return $ res & catMaybes
 465   where
 466     -- Match one of the rules, trying longuest first
 467     parseRules :: P.Parsec () [Lexeme] Pron
 468     parseRules =
 469       P.choice
 470         [ parseRule r
 471         | r <- tbl & Map.toDescList
 472         ]
 473     {-
 474     <|> P.choice
 475       [ parseRule
 476         ( RuleLexemes $
 477             rulePat & unRuleLexemes <&> \case
 478               LexemeChar c -> LexemeChar (c & Char.toUpper)
 479               x -> x
 480         , curRule
 481         )
 482       | (rulePat, curRule) <- tbl & Map.toDescList
 483       ]
 484     -}
 485     parseRule (rulePat, curRule@Rule{..}) =
 486       P.try do
 487         let
 488           pat = rulePat & unRuleLexemes
 489           patSep = \case
 490             LexemeVowel -> True
 491             LexemeSemiVowel -> True
 492             LexemeConsonant -> True
 493             LexemeSilent -> True
 494             LexemeAhead{} -> True
 495             _ -> False
 496           -- (patEnd, patBegin) = pat & List.reverse & List.span patSep
 497           patBegin = pat & List.dropWhileEnd patSep
 498           patEnd = pat & List.reverse & List.takeWhile patSep & List.reverse
 499           patAhead =
 500             pat & List.takeWhile \case
 501               LexemeAhead{} -> True
 502               _ -> False
 503         -- parse without the ending Lexeme{Vowel,SemiVowel,Consonant}
 504         P.chunk patBegin
 505         P.lookAhead do
 506           P.chunk $
 507             patAhead <&> \case
 508               LexemeAhead c -> LexemeChar c
 509               x -> x
 510         inpAfterBegin <- P.getInput
 511         unless (List.null patEnd) do
 512           inpWithAhead <- parseAhead
 513           -- traceShowM ("inpWithAhead"::Text, inpWithAhead)
 514           P.setInput inpWithAhead
 515           P.chunk patEnd & void
 516         -- insert the Lexeme{Vowel,SemiVowel,Consonant} from the output of the current rule
 517         let lastSound =
 518               rulePron
 519                 & unPronunciations
 520                 & List.reverse
 521                 & headMaybe
 522                 & maybe
 523                   []
 524                   ( snd
 525                       >>> pronunciationIPABroad
 526                       >>> List.reverse
 527                       >>> headMaybe
 528                       >>> maybe [] (IPA.syllableToSegments >>> List.reverse >>> lexemeHeadSound)
 529                   )
 530         P.setInput $ lastSound <> inpAfterBegin
 531         return Pron{pronInput = pat, pronRule = curRule}
 532     parseChar :: P.Parsec () [Lexeme] (Maybe (Either Char Pron))
 533     parseChar =
 534       P.anySingle <&> \case
 535         LexemeChar c -> Just $ Left c
 536         _ -> Nothing
 537     parseAhead :: P.Parsec () [Lexeme] [Lexeme]
 538     parseAhead = do
 539       nextStep <- P.observing $ Right <$> parseRules <|> Left <$> P.anySingle
 540       -- traceShowM ("nextStep"::Text, nextStep & either (\err -> Left ()) Right)
 541       case nextStep of
 542         Right (Right Pron{pronInput, pronRule}) -> do
 543           let x =
 544                 pronRule
 545                   & rulePron
 546                   & unPronunciations
 547                   & headMaybe
 548                   & maybe
 549                     []
 550                     ( snd
 551                         >>> pronunciationIPABroad
 552                         >>> headMaybe
 553                         >>> maybe [] (IPA.syllableToSegments >>> lexemeHeadSound)
 554                     )
 555           inp <- P.getInput
 556           return $ x <> pronInput <> inp
 557         Right (Left lex) -> do
 558           parseAhead <&> (lex :)
 559         Left{} -> P.getInput
 560     lexemeHeadSound :: [_] -> [Lexeme]
 561     lexemeHeadSound =
 562       headMaybe >>> fmap IPA.dropSegmentalFeatures >>> \case
 563         Just IPA.Zero{} -> [LexemeSilent]
 564         Just IPA.Vowel{} -> [LexemeVowel]
 565         Just (IPA.Consonant consonant) -> do
 566           case consonant of
 567             IPA.Pulmonic _phonation _place IPA.Approximant -> [LexemeSemiVowel]
 568             IPA.Ejective _place IPA.Approximant -> [LexemeSemiVowel]
 569             _ -> [LexemeConsonant]
 570         _ -> [] -- error
 571
 572 runLexer :: Text -> Either (P.ParseErrorBundle Text ()) [Lexeme]
 573 runLexer inp = inp & P.runParser lexer "input"
 574
 575 exampleLiteralsLexemes :: [ExampleLiteral] -> [Lexeme]
 576 exampleLiteralsLexemes ls =
 577   ls & foldMap \ExampleLiteral{..} ->
 578     unRuleLexemes (fromString (TextShort.unpack exampleLiteralText))
 579       <> [ LexemeMeaning exampleLiteralMeaning
 580          ]
 581
 582 lexer :: P.Parsec () Text [Lexeme]
 583 lexer = do
 584   lls <- P.many do
 585     P.choice $
 586       list
 587         [ P.takeWhile1P Nothing Char.isSpace >>= \cs ->
 588             return [LexemeChar c | c <- cs & Text.unpack]
 589         , do
 590             cs <- P.takeWhile1P Nothing Char.isLetter
 591             mean <- (<|> return []) $ P.try do
 592               P.single '{'
 593               m <- P.takeWhile1P Nothing (/= '}')
 594               P.single '}'
 595               return [LexemeMeaning (TextShort.fromText m)]
 596             return $
 597               LexemeBorder
 598                 : [LexemeChar c | c <- cs & Text.unpack]
 599                   <> mean
 600                   <> [LexemeBorder]
 601         , P.takeWhile1P Nothing Char.isNumber >>= \cs ->
 602             return (LexemeBorder : ([LexemeChar c | c <- cs & Text.unpack] <> [LexemeBorder]))
 603         , P.satisfy Char.isSymbol >>= \c ->
 604             return [LexemeChar c]
 605         , P.satisfy Char.isSeparator >>= \c ->
 606             return [LexemeChar c]
 607         , P.satisfy Char.isMark >>= \c ->
 608             return [LexemeChar c]
 609         , P.satisfy Char.isPunctuation >>= \c ->
 610             return [LexemeChar c]
 611         ]
 612   P.eof
 613   return $ mconcat lls
 614
 615 words :: [Either Char Pron] -> [[Either Char Pron]]
 616 words [] = []
 617 words prons = word0 : words next
 618   where
 619     (word0, rest) = prons & List.span (isSep >>> not)
 620     (_sep, next) = rest & List.span isSep
 621     isSep = \case
 622       Left c | c & Char.isSpace -> True
 623       _ -> False
 624
 625 {-
 626 case statePats st & Map.lookup k of
 627   Nothing -> st
 628   Just (PatTree pats) ->
 629     loop st {statePats = pats, stateBuffer = k : stateBuffer st}
 630   Just (PatEnd end) ->
 631     loop st { statePats = initPats
 632        , stateBuffer = []
 633        , stateInput = stateInput st
 634            & LZ.insert Inp
 635               { inpPats = stateBuffer st & List.reverse
 636               , inpPronunciations = end
 637               }
 638            & LZ.right
 639        }
 640 -}
 641
 642 {-
 643 parse :: PatTree -> Text -> [Inp]
 644 parse initPats input =
 645   let inpZip = input & Text.unpack & fmap charToInp & LZ.fromList in
 646   runInp [] initPats inpZip & LZ.toList
 647   where
 648   charToInp :: Char -> Inp
 649   charToInp c = Inp
 650     { inpPats = [PosNext (PatternChar c)]
 651     , inpPronunciations = []
 652     }
 653   runInp :: [Pos] -> PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
 654   runInp oks pat inp =
 655     traceShow ("runInp"::Text, ("oks"::Text) := oks, ("cur"::Text) := LZ.safeCursor inp) $
 656     case pat of
 657       PatEnd end ->
 658         -- the pattern ends
 659         inp
 660         & LZ.insert Inp
 661           { inpPats = oks & List.reverse
 662           , inpPronunciations = end
 663           }
 664         & LZ.right
 665         & runInp [] initPats
 666       PatTree pats ->
 667         -- the pattern may go on
 668         case inp & LZ.safeCursor of
 669           Nothing ->
 670             inp
 671               & runPat [] oks [PosNext PatternLexicalBorder] pats
 672           Just cur ->
 673             inp & LZ.delete
 674                 & runPat [] oks (inpPats cur & List.sort) pats
 675
 676   runPat :: [Pos] -> [Pos] -> [Pos] -> Map Pos PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
 677   runPat kos oks todos pats inp =
 678     traceShow ( "runPat"::Text
 679               , ("kos"::Text) := kos
 680               , ("oks"::Text) := oks
 681               , ("todos"::Text) := todos
 682               , ("cur"::Text) :=LZ.safeCursor inp
 683               ) $
 684     case todos of
 685       [] | LZ.endp inp -> inp
 686         & runInp kos (PatEnd [])
 687         & runInp oks (PatEnd [])
 688       [] ->
 689         -- nothing left to advance the pattern
 690         --traceShow ("runPat/[]"::Text) $
 691           inp
 692             & (if null kos then id else runInp kos (PatEnd []))
 693             & runInp oks (PatTree pats)
 694       k:ks ->
 695         case pats & Map.lookup k of
 696           -- the pattern ends
 697           Just (PatEnd end) ->
 698             --traceShow ("runPat/End"::Text) $
 699             -- ks forgotten
 700             inp
 701               & (if null kos then id else runInp kos (PatEnd []))
 702               & runInp (k:oks) (PatEnd end)
 703           -- the pattern advances
 704           Just (PatTree nextPats) ->
 705             --traceShow ("runPat/Node"::Text) $
 706             inp & runPat kos (k:oks) ks nextPats
 707           -- the pattern does not advance
 708           Nothing ->
 709             inp & runPat (k:kos) oks ks pats
 710 -}
 711
 712 data Lexeme
 713   = LexemeBorder
 714   | LexemeVowel
 715   | LexemeSemiVowel
 716   | LexemeConsonant
 717   | LexemeDoubleConsonant
 718   | LexemeSilent
 719   | LexemeMeaning ShortText
 720   | -- | `LexemeChar` is last to have priority when using `Map.toDescList`
 721     LexemeChar Char
 722   | -- | `LexemeAhead` is last to have priority when using `Map.toDescList`
 723     LexemeAhead Char
 724   deriving (Eq, Ord, Show)
 725
 726 -- data Sound
 727 --   = SoundVowel
 728 --   | SoundConsonant
 729 --   deriving (Eq, Ord, Show)
 730
 731 {-
 732 newtype Lexemes = Lexemes { unLexemes :: [Lexeme] }
 733   deriving (Eq, Ord, Show)
 734 instance P.Stream Lexemes where
 735   type Token Lexemes = Lexeme
 736   type Tokens Lexemes = Lexemes
 737   tokensToChunk _px = Lexemes
 738   chunkToTokens _px = unLexemes
 739   chunkLength _px = unLexemes >>> List.length
 740   chunkEmpty _px = unLexemes >>> List.null
 741   take1_ = unLexemes >>> P.take1_ >>> coerce
 742   takeN_ n = unLexemes >>> P.takeN_ n >>> coerce
 743   takeWhile_ p = unLexemes >>> P.takeWhile_ p >>> coerce
 744
 745 instance IsString Lexemes where
 746   fromString s =
 747     s
 748       & Text.pack
 749       & runLexer
 750       & either
 751           errorShow
 752           ((`appEndo` []) >>> Lexemes)
 753 -}
 754
 755 newtype RuleLexemes = RuleLexemes {unRuleLexemes :: [Lexeme]}
 756   deriving (Eq, Ord, Show)
 757 instance HasTypeDefault RuleLexemes where
 758   typeDefault = RuleLexemes typeDefault
 759 instance Semigroup RuleLexemes where
 760   RuleLexemes x <> RuleLexemes y = RuleLexemes (x <> y)
 761 instance Monoid RuleLexemes where
 762   mempty = RuleLexemes mempty
 763 instance IsList RuleLexemes where
 764   type Item RuleLexemes = Lexeme
 765   fromList = RuleLexemes
 766   toList = unRuleLexemes
 767 instance IsString RuleLexemes where
 768   fromString s =
 769     s
 770       & Text.pack
 771       & runLexer
 772       & either
 773         errorShow
 774         ( List.dropWhileEnd (== LexemeBorder)
 775             >>> List.dropWhile (== LexemeBorder)
 776             >>> RuleLexemes
 777         )
 778
 779 newtype InputLexemes = InputLexemes {unInputLexemes :: [Lexeme]}
 780   deriving (Eq, Ord, Show)
 781 instance HasTypeDefault InputLexemes where
 782   typeDefault = InputLexemes typeDefault
 783 instance Semigroup InputLexemes where
 784   InputLexemes x <> InputLexemes y = InputLexemes (x <> y)
 785 instance Monoid InputLexemes where
 786   mempty = InputLexemes mempty
 787 instance IsList InputLexemes where
 788   type Item InputLexemes = Lexeme
 789   fromList = InputLexemes
 790   toList = unInputLexemes
 791 instance IsString InputLexemes where
 792   fromString s =
 793     s
 794       & Text.pack
 795       & runLexer
 796       & either errorShow InputLexemes
 797
 798 instance P.ShowErrorComponent () where
 799   showErrorComponent = show
 800   errorComponentLen _ = 2
 801 instance P.VisualStream [Lexeme] where
 802   showTokens _s = show
 803   tokensLength _s xs = xs <&> (show >>> List.length) & sum
 804 instance P.TraversableStream [Lexeme] where
 805   reachOffset off pos = (Nothing, pos{P.pstateOffset = P.pstateOffset pos + off})
 806
 807 data LexemeTag
 808   = LexemeTagLetter
 809   | LexemeTagSpace
 810   | LexemeTagPunctuation
 811   | LexemeTagSeparator
 812   | LexemeTagMark
 813   | LexemeTagSymbol
 814   | LexemeTagDefinition
 815   | LexemeTagBorder
 816   deriving (Eq, Ord, Show)
 817 deriving instance Ord (IPA.Syllable [])
 818 deriving instance Ord IPA.SuprasegmentalFeature
 819 deriving instance Ord IPA.SegmentalFeature
 820 deriving instance Ord IPA.Sibilance
 821 deriving instance Ord IPA.Manner
 822 deriving instance Ord IPA.Phonation
 823 deriving instance Ord IPA.Roundedness
 824 deriving instance Ord IPA.Height
 825 deriving instance Ord IPA.Vowel
 826 deriving instance Ord IPA.Consonant
 827 deriving instance Ord IPA.Segment
 828
 829 {-
 830 tableToMatch :: Table -> [Lexeme] -> [Pronunciations]
 831 tableToMatch tbl = loop
 832   where
 833   loop prevBorder = \case
 834     InputText inp ->
 835       [ (matchingLength, )
 836       | (trans, transMach) <- chunk & chunkMachine & machineAlts & Map.toList
 837       , let matchingLength = transMatchingLength input trans
 838       , 0 < matchingLength || not (isTransConsume trans)
 839       , let (inputRead, inputRest) = input & Text.splitAt matchingLength
 840       ]
 841         & Map.fromListWith (\new old -> old)
 842         & Map.lookupMax
 843         <&> snd
 844
 845 -}
 846 tableHtml :: Table -> IO HTML.Html
 847 tableHtml tbl = do
 848   dataPath <- Self.getDataDir <&> File.normalise
 849   let title :: String = "LexerDict"
 850   let pageOrientation = Paper.PageOrientationPortrait
 851   let pageSize = Paper.PageSizeA4
 852   let partLangue = LangueFrançais
 853   return do
 854     HTML.docTypeHtml do
 855       HTML.head do
 856         HTML.title $ title & HTML.toHtml
 857         forM_
 858           ( [ "styles/Paper.css"
 859             , "styles/French/Lexer.css"
 860             , "styles/Rosetta/Reading.css"
 861             ]
 862               & list
 863           )
 864           \cssFile ->
 865             HTML.link
 866               ! HA.rel "stylesheet"
 867               ! HA.type_ "text/css"
 868               ! HA.href (dataPath </> cssFile & HTML.toValue)
 869         HTML.styleCSS $ HTML.cssPrintPage pageOrientation pageSize
 870       -- HTML.styleCSS $ pageDifficulties & difficultyCSS
 871       HTML.body
 872         ! classes ["A4", "french-lexer"]
 873         $ do
 874           "\n"
 875           let rulesChunks = tbl & Map.toList & chunksOf 50
 876           forM_ rulesChunks \rules ->
 877             HTML.section
 878               ! classes
 879                 [ "sheet"
 880                 ]
 881               ! styles
 882                 []
 883               $ do
 884                 forM_ (rules & List.zip [1 :: Int ..]) \(ruleIndex, (rulePat, Rule{..})) -> do
 885                   "\n"
 886                   HTML.div
 887                     ! classes
 888                       [ "dict-entry"
 889                       , if even ruleIndex then "even" else "odd"
 890                       ]
 891                     ! styles
 892                       []
 893                     $ do
 894                       "\n"
 895                       HTML.div
 896                         ! classes
 897                           [ "dict-key"
 898                           , "lang-" <> className partLangue
 899                           ]
 900                         ! styles
 901                           []
 902                         -- "grid-template-columns" :=
 903                         --  (0.5 & cm & HTML.toCSS)
 904                         --    & List.replicate lexerDictMaxKeyLength
 905                         --    & List.unwords
 906
 907                         $ do
 908                           forM_ (["model"] :: [String]) \rowKind -> do
 909                             forM_ (rulePat & unRuleLexemes) \ruleChar -> do
 910                               -- let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
 911                               case ruleChar of
 912                                 LexemeChar c -> do
 913                                   HTML.span
 914                                     ! classes
 915                                       [ "dict-key-cell"
 916                                       , rowKind
 917                                       -- , "script-" <> className uniScript
 918                                       ]
 919                                     $ do
 920                                       c & HTML.toHtml
 921                                 _ -> ""
 922                       HTML.div
 923                         ! classes
 924                           [ "dict-pronunciation"
 925                           ]
 926                         $ do
 927                           -- HTML.span ! classes ["arrow"] $ "→"
 928                           case rulePron of
 929                             Pronunciations
 930                               { unPronunciations =
 931                                 all (snd >>> pronunciationIPABroad >>> all IPA.isSilent) -> True
 932                               } -> ""
 933                             Pronunciations{unPronunciations = is} ->
 934                               is
 935                                 & foldMap (snd >>> pronunciationIPABroad >>> foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic))
 936                                 & HTML.toHtml
 937                       HTML.div
 938                         ! classes
 939                           [ "dict-lexeme"
 940                           ]
 941                         $ do
 942                           -- HTML.span ! classes ["arrow"] $ "→"
 943                           forM_ (ruleExamples & Map.toList) \(_inp, Pronunciation{..}) -> do
 944                             HTML.span
 945                               ! classes
 946                                 []
 947                               $ do
 948                                 case pronunciationIPABroad of
 949                                   [] -> pronunciationText & HTML.toHtml
 950                                   _ -> pronunciationIPABroad & foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic) & HTML.toHtml
 951                             "; "