src/Language/Pronunciation.hs

   1 {-# LANGUAGE OverloadedLists #-}
   2 {-# LANGUAGE OverloadedStrings #-}
   3 {-# LANGUAGE RankNTypes #-}
   4 {-# LANGUAGE StandaloneDeriving #-}
   5
   6 module Language.Pronunciation where
   7
   8 import Control.Applicative (Alternative (..))
   9 import Control.Monad.Combinators qualified as P
  10 import Control.Monad.Trans.Class qualified as MT
  11 import Control.Monad.Trans.State qualified as MT
  12 import Data.List qualified as List
  13 import Data.List.Zipper qualified as LZ
  14 import Data.Map.Strict qualified as Map
  15 import Data.Set qualified as Set
  16 import Data.Text qualified as Text
  17 import Data.Text.Short qualified as TextShort
  18 import Data.Traversable (traverse)
  19 import Language
  20 import Paths_worksheets qualified as Self
  21 import System.FilePath.Posix ((</>))
  22 import System.FilePath.Posix qualified as File
  23 import Text.Blaze.Html5.Attributes qualified as HA
  24 import Text.Megaparsec qualified as P
  25 import Worksheets.Utils.Char qualified as Char
  26 import Worksheets.Utils.HTML (className, classes, styles, (!))
  27 import Worksheets.Utils.HTML qualified as HTML
  28 import Worksheets.Utils.IPA qualified as IPA
  29 import Worksheets.Utils.Paper qualified as Paper
  30 import Worksheets.Utils.Prelude
  31
  32 data Pronunciation = Pronunciation
  33   { pronunciationIPABroad :: [IPA.Syllable []]
  34   , pronunciationText :: Text
  35   }
  36   deriving (Eq, Ord, Show)
  37 instance Semigroup Pronunciation where
  38   x <> y =
  39     Pronunciation
  40       { pronunciationIPABroad = pronunciationIPABroad x <> pronunciationIPABroad y
  41       , pronunciationText =
  42           [pronunciationText x, pronunciationText y]
  43             & List.filter (/= "")
  44             & Text.intercalate "."
  45       }
  46 instance IsList Pronunciation where
  47   type Item Pronunciation = IPA.Syllable []
  48   toList = pronunciationIPABroad
  49
  50   -- fromList :: HasCallStack => [Item Pronunciation] -> Pronunciation
  51   fromList l =
  52     Pronunciation
  53       { pronunciationIPABroad = ipa
  54       , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
  55       }
  56     where
  57       ipa =
  58         l
  59           & mapButLast (IPA.WithSuprasegmentalFeature IPA.Break)
  60           & fromList
  61 instance IsString Pronunciation where
  62   fromString s =
  63     Pronunciation
  64       { pronunciationIPABroad = ipa
  65       , pronunciationText = ipa & foldMap (IPA.toIPA_ >>> IPA.unIPA)
  66       }
  67     where
  68       ipa =
  69         s
  70           & Text.pack
  71           & IPA.parseSyllables @[]
  72           & either errorShow id
  73
  74 {-
  75 data PronunciationKey = PronunciationKey
  76   { pronunciationKeyText :: Text
  77   , pronunciationKeyPron :: Pronunciation
  78   , pronunciationKey
  79   }
  80 -}
  81
  82 newtype Pronunciations = Pronunciations
  83   { unPronunciations :: [(RuleLexemes, Pronunciation)]
  84   }
  85   deriving (Eq, Ord)
  86   deriving newtype (Show)
  87   deriving newtype (Semigroup)
  88   deriving newtype (Monoid)
  89 joinPronunciations :: Pronunciations -> Pronunciations
  90 joinPronunciations (Pronunciations ps) =
  91   Pronunciations
  92     [ input :=
  93         Pronunciation
  94           { pronunciationIPABroad = ipa
  95           , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
  96           }
  97     ]
  98   where
  99     input :: RuleLexemes
 100     ipa :: [IPA.Syllable []]
 101     (input, ipa) =
 102       ps
 103         & List.foldr
 104           ( \(inp, Pronunciation{pronunciationIPABroad}) (suffix, l, acc) ->
 105               case pronunciationIPABroad of
 106                 [] -> (inp <> suffix, IPA.Syllable [], acc)
 107                 [syl@(IPA.Syllable [])] -> (inp <> suffix, syl, acc)
 108                 [syl] -> (inp <> suffix, IPA.Syllable [], (syl <> l) : acc)
 109                 [sylL, sylR] -> (inp <> suffix, sylL, glueSyllableToTheRight sylR acc)
 110                 _ -> errorShow pronunciationIPABroad
 111           )
 112           ( ""
 113           , IPA.Syllable []
 114           , []
 115           )
 116         & ( \(i, l, acc) ->
 117               ( i
 118               , glueSyllableToTheRight l acc
 119                   & mapButLast (IPA.setSuprasegmentalFeatures [IPA.Break])
 120               )
 121           )
 122     glueSyllableToTheRight ::
 123       IPA.Syllable [] ->
 124       [IPA.Syllable []] ->
 125       [IPA.Syllable []]
 126     glueSyllableToTheRight x y =
 127       case y of
 128         [] -> [x]
 129         [yL] -> [x <> yL]
 130         yL : yR -> x <> yL : yR
 131 instance IsList Pronunciations where
 132   type Item Pronunciations = (RuleLexemes, Pronunciation)
 133   toList = unPronunciations
 134   fromList l = Pronunciations{unPronunciations = l & fromList}
 135
 136 {-
 137 instance IsString Pronunciations where
 138   fromString = \case
 139     "" -> Pronunciations "" [IPA.Syllable [IPA.Zero]]
 140     s -> Pronunciations (s & Text.pack) $
 141           fromString s
 142             & IPA.parseSyllables @[]
 143             & either errorShow id
 144 -}
 145 data ExampleLiteral = ExampleLiteral
 146   { exampleLiteralText :: ShortText
 147   , exampleLiteralTags :: Set LiteralTag
 148   , exampleLiteralMeaning :: ShortText
 149   }
 150   deriving (Eq, Ord, Show)
 151 instance IsString ExampleLiteral where
 152   fromString s =
 153     ExampleLiteral
 154       { exampleLiteralText = s & fromString
 155       , exampleLiteralTags = Set.empty
 156       , exampleLiteralMeaning = ""
 157       }
 158 data LiteralTag
 159   = LiteralTagOccurence
 160   | LiteralTagMeta
 161   | LiteralTagSilent
 162   deriving (Eq, Ord, Show)
 163 exampleLiteralsText :: [ExampleLiteral] -> ShortText
 164 exampleLiteralsText ls = ls <&> exampleLiteralText & mconcat
 165
 166 hyphen =
 167   ExampleLiteral
 168     { exampleLiteralText = "-"
 169     , exampleLiteralTags = [LiteralTagMeta] & Set.fromList
 170     , exampleLiteralMeaning = ""
 171     }
 172 occurence lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagOccurence}
 173 silent lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagSilent}
 174
 175 type SyllableText = ShortText
 176 type SyllableBroad = IPA.Syllable []
 177 type SyllablesTable = Map SyllableText (Map SyllableText [([ExampleLiteral], SyllableBroad)])
 178
 179 data Adjacent
 180   = AdjacentBorder
 181   | AdjacentVowel
 182   | AdjacentConsonant
 183   deriving (Eq, Ord, Show)
 184 data Variant
 185   = VariantDefinition Text
 186   | VariantStress
 187   deriving (Eq, Ord, Show)
 188 data Rule = Rule
 189   { -- , ruleStress :: Bool
 190     -- , ruleDefinition :: Maybe Text
 191
 192     -- [ "e"    := ["ɛ"] , "x"    := ["g","z"] , "er"   := ["ɛʁ"] , "cice" := ["sis"] ]
 193     -- [ "exercice" := ["ɛg.zɛʁ.sis"]
 194     -- ]
 195     rulePron :: Pronunciations
 196   , ruleExamples :: Map InputLexemes Pronunciation
 197   }
 198   deriving (Eq, Ord, Show)
 199
 200 rule =
 201   Rule
 202     { rulePron = Pronunciations{unPronunciations = []}
 203     , ruleExamples = mempty
 204     }
 205 space = rule
 206 word
 207   , begining
 208   , ending ::
 209     RuleLexemes -> RuleLexemes
 210 word = begining >>> ending
 211 begining = after [LexemeBorder]
 212 ending = before [LexemeBorder]
 213 before ls r = RuleLexemes (unRuleLexemes r <> ls)
 214 after ls r = RuleLexemes (ls <> unRuleLexemes r)
 215 meaning r d = RuleLexemes (unRuleLexemes r <> [LexemeMeaning d])
 216
 217 type Table = Map RuleLexemes Rule
 218 examples :: Table -> Map InputLexemes Pronunciation
 219 examples tbl =
 220   [ v & ruleExamples
 221   | v <- tbl & Map.elems
 222   ]
 223     & Map.unionsWith (\new old -> if new == old then new else errorShow (new, old))
 224
 225 data Pron = Pron
 226   { pronInput :: [Lexeme]
 227   , pronRule :: Rule
 228   }
 229   deriving (Eq, Show)
 230
 231 data Syl = Syl
 232   { sylText :: Text
 233   , sylDependsOnBefore :: Bool
 234   , sylDependsOnAfter :: Bool
 235   , sylDependsOnMeaning :: Bool
 236   , sylSound :: Text -- [IPA.Syllable []]
 237   , sylIndex :: Int
 238   , sylSilent :: Bool
 239   , sylSplit :: Bool
 240   }
 241
 242 addIndexes :: [[Either Char Pron]] -> [[Syl]]
 243 addIndexes = go 0
 244   where
 245     go _idx [] = []
 246     go idx (prons : next) = List.reverse prons' : go idx' next
 247       where
 248         (idx', prons') =
 249           prons
 250             & List.foldl'
 251               ( \(i, is) -> \case
 252                   Left c ->
 253                     ( i
 254                     , Syl
 255                         { sylText = Text.singleton c
 256                         , sylDependsOnAfter = False
 257                         , sylDependsOnBefore = False
 258                         , sylDependsOnMeaning = False
 259                         , sylSound = []
 260                         , sylIndex = i
 261                         , sylSilent = True
 262                         , sylSplit = False
 263                         }
 264                         : is
 265                     )
 266                   Right Pron{pronRule = Rule{rulePron = Pronunciations{unPronunciations = ps}}} ->
 267                     ps
 268                       & List.foldl'
 269                         ( \(j, js) (t, Pronunciation{..}) ->
 270                             let sylText = t & unRuleLexemes & lexemesChars & Text.pack
 271                             in case pronunciationIPABroad of
 272                                 []
 273                                   | not (Text.null pronunciationText) ->
 274                                       ( j + 1
 275                                       , Syl
 276                                           { sylText
 277                                           , sylSound = pronunciationText
 278                                           , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 279                                           , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 280                                           , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 281                                           , sylIndex = j + 1
 282                                           , sylSilent = False
 283                                           , sylSplit = False
 284                                           }
 285                                           : js
 286                                       )
 287                                 [IPA.Syllable []]
 288                                   | Text.null pronunciationText ->
 289                                       ( j
 290                                       , case js of
 291                                           [] ->
 292                                             Syl
 293                                               { sylText
 294                                               , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 295                                               , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 296                                               , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 297                                               , sylSound = ""
 298                                               , sylIndex = j
 299                                               , sylSilent = True
 300                                               , sylSplit = False
 301                                               }
 302                                               : js
 303                                           j0@Syl{sylText = j0t} : jss -> j0{sylText = j0t <> sylText} : jss
 304                                       )
 305                                 syls
 306                                   | (syls & all IPA.isSilent) && Text.null pronunciationText ->
 307                                       ( j
 308                                       , Syl
 309                                           { sylText
 310                                           , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 311                                           , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 312                                           , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 313                                           , sylSound = ""
 314                                           , sylIndex = j
 315                                           , sylSilent = True
 316                                           , sylSplit = False
 317                                           }
 318                                           : js
 319                                       )
 320                                 _syls@[_] ->
 321                                   ( j + 1
 322                                   , Syl
 323                                       { sylText
 324                                       , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 325                                       , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 326                                       , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 327                                       , sylSound = pronunciationText
 328                                       , sylIndex = j + 1
 329                                       , sylSilent = False
 330                                       , sylSplit = False
 331                                       }
 332                                       : js
 333                                   )
 334                                 _syls@[_, _] ->
 335                                   ( j
 336                                   , Syl
 337                                       { sylText
 338                                       , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 339                                       , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 340                                       , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 341                                       , sylSound = pronunciationText
 342                                       , sylIndex = j
 343                                       , sylSilent = False
 344                                       , sylSplit = True
 345                                       }
 346                                       : js
 347                                   )
 348                                 syls -> errorShow syls
 349                         )
 350                         (i, is)
 351                     where
 352                       sylDependsOnAfter =
 353                         List.reverse >>> \case
 354                           LexemeBorder : _ -> True
 355                           LexemeSilent : _ -> True
 356                           LexemeConsonant : _ -> True
 357                           LexemeDoubleConsonant : _ -> True
 358                           LexemeVowel : _ -> True
 359                           LexemeSemiVowel : _ -> True
 360                           _ -> False
 361                       sylDependsOnBefore =
 362                         \case
 363                           LexemeBorder : _ -> True
 364                           LexemeSilent : _ -> True
 365                           LexemeConsonant : _ -> True
 366                           LexemeDoubleConsonant : _ -> True
 367                           LexemeVowel : _ -> True
 368                           LexemeSemiVowel : _ -> True
 369                           _ -> False
 370                       sylDependsOnMeaning =
 371                         List.reverse >>> \case
 372                           LexemeMeaning{} : _ -> True
 373                           _ -> False
 374               )
 375               (idx, [])
 376
 377 withCapital :: [(RuleLexemes, Rule)] -> [(RuleLexemes, Rule)]
 378 withCapital =
 379   foldMap \(RuleLexemes pat, rul) ->
 380     [ (RuleLexemes pat, rul)
 381     ,
 382       ( RuleLexemes (withCapitalLexemes pat)
 383       , rul
 384           { rulePron = rul & rulePron & withCapitalPronunciations
 385           , ruleExamples =
 386               rul
 387                 & ruleExamples
 388                 & Map.mapKeys
 389                   ( unInputLexemes
 390                       >>> withCapitalLexemes
 391                       >>> InputLexemes
 392                   )
 393           }
 394       )
 395     ]
 396   where
 397     withCapitalPronunciations (Pronunciations []) = Pronunciations []
 398     withCapitalPronunciations (Pronunciations ((t, p) : ps)) =
 399       Pronunciations ((RuleLexemes $ withCapitalLexemes $ unRuleLexemes t, p) : ps)
 400     withCapitalLexemes (LexemeChar x : xs) = LexemeChar (Char.toUpper x) : xs
 401     withCapitalLexemes (x : xs) = x : withCapitalLexemes xs
 402     withCapitalLexemes [] = []
 403
 404 lexemesChars :: [Lexeme] -> [Char]
 405 lexemesChars p =
 406   p & foldMap \case
 407     LexemeChar c -> [c]
 408     _ -> []
 409
 410 run ::
 411   Table ->
 412   Text ->
 413   Either
 414     ( Either
 415         (P.ParseErrorBundle Text ())
 416         (P.ParseErrorBundle [Lexeme] ())
 417     )
 418     [Either Char Pron]
 419 run rules inp =
 420   inp
 421     & runLexer
 422     & either (Left . Left) \lexs ->
 423       lexs
 424         & runParser rules
 425         & either (Left . Right) Right
 426
 427 runParser :: Table -> [Lexeme] -> Either (P.ParseErrorBundle [Lexeme] ()) [Either Char Pron]
 428 runParser tbl inp = inp & P.runParser (parser tbl) "input"
 429
 430 parseLiterals ::
 431   Table ->
 432   [ExampleLiteral] ->
 433   Either
 434     ( Either
 435         (P.ParseErrorBundle Text ())
 436         (P.ParseErrorBundle [Lexeme] ())
 437     )
 438     [Either Char Pron]
 439 parseLiterals rules inp =
 440   inp
 441     & traverse
 442       ( \ExampleLiteral{..} ->
 443           exampleLiteralText
 444             & TextShort.toText
 445             & runLexer
 446             <&> ( <>
 447                     [ LexemeMeaning exampleLiteralMeaning
 448                     | exampleLiteralMeaning & TextShort.null & not
 449                     ]
 450                 )
 451       )
 452     & either (Left . Left) \lexs ->
 453       lexs
 454         & mconcat
 455         & runParser rules
 456         & either (Left . Right) Right
 457
 458 parser :: Table -> P.Parsec () [Lexeme] [Either Char Pron]
 459 parser tbl = do
 460   res <- P.many $ (Just . Right) <$> parseRules <|> parseChar
 461   P.eof
 462   return $ res & catMaybes
 463   where
 464     -- Match one of the rules, trying longuest first
 465     parseRules :: P.Parsec () [Lexeme] Pron
 466     parseRules =
 467       P.choice
 468         [ parseRule r
 469         | r <- tbl & Map.toDescList
 470         ]
 471     {-
 472     <|> P.choice
 473       [ parseRule
 474         ( RuleLexemes $
 475             rulePat & unRuleLexemes <&> \case
 476               LexemeChar c -> LexemeChar (c & Char.toUpper)
 477               x -> x
 478         , curRule
 479         )
 480       | (rulePat, curRule) <- tbl & Map.toDescList
 481       ]
 482     -}
 483     parseRule (rulePat, curRule@Rule{..}) =
 484       P.try do
 485         let
 486           pat = rulePat & unRuleLexemes
 487           patSep = (`List.elem` list [LexemeVowel, LexemeSemiVowel, LexemeConsonant, LexemeSilent])
 488           -- (patEnd, patBegin) = pat & List.reverse & List.span patSep
 489           patBegin = pat & List.dropWhileEnd patSep
 490           patEnd = pat & List.reverse & List.takeWhile patSep & List.reverse
 491         -- parse without the ending Lexeme{Vowel,SemiVowel,Consonant}
 492         P.chunk patBegin
 493         inpAfterBegin <- P.getInput
 494         unless (List.null patEnd) do
 495           inpWithAhead <- parseAhead
 496           -- traceShowM ("inpWithAhead"::Text, inpWithAhead)
 497           P.setInput inpWithAhead
 498           P.chunk patEnd & void
 499         -- insert the Lexeme{Vowel,SemiVowel,Consonant} from the output of the current rule
 500         let lastSound =
 501               rulePron
 502                 & unPronunciations
 503                 & List.reverse
 504                 & headMaybe
 505                 & maybe
 506                   []
 507                   ( snd
 508                       >>> pronunciationIPABroad
 509                       >>> List.reverse
 510                       >>> headMaybe
 511                       >>> maybe [] (IPA.syllableToSegments >>> List.reverse >>> lexemeHeadSound)
 512                   )
 513         P.setInput $ lastSound <> inpAfterBegin
 514         return Pron{pronInput = pat, pronRule = curRule}
 515     parseChar :: P.Parsec () [Lexeme] (Maybe (Either Char Pron))
 516     parseChar =
 517       P.anySingle <&> \case
 518         LexemeChar c -> Just $ Left c
 519         _ -> Nothing
 520     parseAhead :: P.Parsec () [Lexeme] [Lexeme]
 521     parseAhead = do
 522       nextStep <- P.observing $ Right <$> parseRules <|> Left <$> P.anySingle
 523       -- traceShowM ("nextStep"::Text, nextStep & either (\err -> Left ()) Right)
 524       case nextStep of
 525         Right (Right Pron{pronInput, pronRule}) -> do
 526           let x =
 527                 pronRule
 528                   & rulePron
 529                   & unPronunciations
 530                   & headMaybe
 531                   & maybe
 532                     []
 533                     ( snd
 534                         >>> pronunciationIPABroad
 535                         >>> headMaybe
 536                         >>> maybe [] (IPA.syllableToSegments >>> lexemeHeadSound)
 537                     )
 538           inp <- P.getInput
 539           return $ x <> pronInput <> inp
 540         Right (Left lex) -> do
 541           parseAhead <&> (lex :)
 542         Left{} -> P.getInput
 543     lexemeHeadSound :: [_] -> [Lexeme]
 544     lexemeHeadSound =
 545       headMaybe >>> fmap IPA.dropSegmentalFeatures >>> \case
 546         Just IPA.Zero{} -> [LexemeSilent]
 547         Just IPA.Vowel{} -> [LexemeVowel]
 548         Just (IPA.Consonant consonant) -> do
 549           case consonant of
 550             IPA.Pulmonic _phonation _place IPA.Approximant -> [LexemeSemiVowel]
 551             IPA.Ejective _place IPA.Approximant -> [LexemeSemiVowel]
 552             _ -> [LexemeConsonant]
 553         _ -> [] -- error
 554
 555 runLexer :: Text -> Either (P.ParseErrorBundle Text ()) [Lexeme]
 556 runLexer inp = inp & P.runParser lexer "input"
 557
 558 exampleLiteralsLexemes :: [ExampleLiteral] -> [Lexeme]
 559 exampleLiteralsLexemes ls =
 560   ls & foldMap \ExampleLiteral{..} ->
 561     unRuleLexemes (fromString (TextShort.unpack exampleLiteralText))
 562       <> [ LexemeMeaning exampleLiteralMeaning
 563          ]
 564
 565 lexer :: P.Parsec () Text [Lexeme]
 566 lexer = do
 567   lls <- P.many do
 568     P.choice $
 569       list
 570         [ P.takeWhile1P Nothing Char.isSpace >>= \cs ->
 571             return [LexemeChar c | c <- cs & Text.unpack]
 572         , do
 573             cs <- P.takeWhile1P Nothing Char.isLetter
 574             mean <- (<|> return []) $ P.try do
 575               P.single '{'
 576               m <- P.takeWhile1P Nothing (/= '}')
 577               P.single '}'
 578               return [LexemeMeaning (TextShort.fromText m)]
 579             return $
 580               LexemeBorder
 581                 : [LexemeChar c | c <- cs & Text.unpack]
 582                   <> mean
 583                   <> [LexemeBorder]
 584         , P.takeWhile1P Nothing Char.isNumber >>= \cs ->
 585             return (LexemeBorder : ([LexemeChar c | c <- cs & Text.unpack] <> [LexemeBorder]))
 586         , P.satisfy Char.isSymbol >>= \c ->
 587             return [LexemeChar c]
 588         , P.satisfy Char.isSeparator >>= \c ->
 589             return [LexemeChar c]
 590         , P.satisfy Char.isMark >>= \c ->
 591             return [LexemeChar c]
 592         , P.satisfy Char.isPunctuation >>= \c ->
 593             return [LexemeChar c]
 594         ]
 595   P.eof
 596   return $ mconcat lls
 597
 598 words :: [Either Char Pron] -> [[Either Char Pron]]
 599 words [] = []
 600 words prons = word0 : words next
 601   where
 602     (word0, rest) = prons & List.span (isSep >>> not)
 603     (_sep, next) = rest & List.span isSep
 604     isSep = \case
 605       Left c | c & Char.isSpace -> True
 606       _ -> False
 607
 608
 609 {-
 610 case statePats st & Map.lookup k of
 611   Nothing -> st
 612   Just (PatTree pats) ->
 613     loop st {statePats = pats, stateBuffer = k : stateBuffer st}
 614   Just (PatEnd end) ->
 615     loop st { statePats = initPats
 616        , stateBuffer = []
 617        , stateInput = stateInput st
 618            & LZ.insert Inp
 619               { inpPats = stateBuffer st & List.reverse
 620               , inpPronunciations = end
 621               }
 622            & LZ.right
 623        }
 624 -}
 625
 626 {-
 627 parse :: PatTree -> Text -> [Inp]
 628 parse initPats input =
 629   let inpZip = input & Text.unpack & fmap charToInp & LZ.fromList in
 630   runInp [] initPats inpZip & LZ.toList
 631   where
 632   charToInp :: Char -> Inp
 633   charToInp c = Inp
 634     { inpPats = [PosNext (PatternChar c)]
 635     , inpPronunciations = []
 636     }
 637   runInp :: [Pos] -> PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
 638   runInp oks pat inp =
 639     traceShow ("runInp"::Text, ("oks"::Text) := oks, ("cur"::Text) := LZ.safeCursor inp) $
 640     case pat of
 641       PatEnd end ->
 642         -- the pattern ends
 643         inp
 644         & LZ.insert Inp
 645           { inpPats = oks & List.reverse
 646           , inpPronunciations = end
 647           }
 648         & LZ.right
 649         & runInp [] initPats
 650       PatTree pats ->
 651         -- the pattern may go on
 652         case inp & LZ.safeCursor of
 653           Nothing ->
 654             inp
 655               & runPat [] oks [PosNext PatternLexicalBorder] pats
 656           Just cur ->
 657             inp & LZ.delete
 658                 & runPat [] oks (inpPats cur & List.sort) pats
 659
 660   runPat :: [Pos] -> [Pos] -> [Pos] -> Map Pos PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
 661   runPat kos oks todos pats inp =
 662     traceShow ( "runPat"::Text
 663               , ("kos"::Text) := kos
 664               , ("oks"::Text) := oks
 665               , ("todos"::Text) := todos
 666               , ("cur"::Text) :=LZ.safeCursor inp
 667               ) $
 668     case todos of
 669       [] | LZ.endp inp -> inp
 670         & runInp kos (PatEnd [])
 671         & runInp oks (PatEnd [])
 672       [] ->
 673         -- nothing left to advance the pattern
 674         --traceShow ("runPat/[]"::Text) $
 675           inp
 676             & (if null kos then id else runInp kos (PatEnd []))
 677             & runInp oks (PatTree pats)
 678       k:ks ->
 679         case pats & Map.lookup k of
 680           -- the pattern ends
 681           Just (PatEnd end) ->
 682             --traceShow ("runPat/End"::Text) $
 683             -- ks forgotten
 684             inp
 685               & (if null kos then id else runInp kos (PatEnd []))
 686               & runInp (k:oks) (PatEnd end)
 687           -- the pattern advances
 688           Just (PatTree nextPats) ->
 689             --traceShow ("runPat/Node"::Text) $
 690             inp & runPat kos (k:oks) ks nextPats
 691           -- the pattern does not advance
 692           Nothing ->
 693             inp & runPat (k:kos) oks ks pats
 694 -}
 695
 696
 697 data Lexeme
 698   = LexemeBorder
 699   | LexemeVowel
 700   | LexemeSemiVowel
 701   | LexemeConsonant
 702   | LexemeDoubleConsonant
 703   | LexemeSilent
 704   | LexemeMeaning ShortText
 705   | -- | `LexemeChar` is last to have priority when using `Map.toDescList`
 706     LexemeChar Char
 707   deriving (Eq, Ord, Show)
 708
 709 -- data Sound
 710 --   = SoundVowel
 711 --   | SoundConsonant
 712 --   deriving (Eq, Ord, Show)
 713
 714 {-
 715 newtype Lexemes = Lexemes { unLexemes :: [Lexeme] }
 716   deriving (Eq, Ord, Show)
 717 instance P.Stream Lexemes where
 718   type Token Lexemes = Lexeme
 719   type Tokens Lexemes = Lexemes
 720   tokensToChunk _px = Lexemes
 721   chunkToTokens _px = unLexemes
 722   chunkLength _px = unLexemes >>> List.length
 723   chunkEmpty _px = unLexemes >>> List.null
 724   take1_ = unLexemes >>> P.take1_ >>> coerce
 725   takeN_ n = unLexemes >>> P.takeN_ n >>> coerce
 726   takeWhile_ p = unLexemes >>> P.takeWhile_ p >>> coerce
 727
 728 instance IsString Lexemes where
 729   fromString s =
 730     s
 731       & Text.pack
 732       & runLexer
 733       & either
 734           errorShow
 735           ((`appEndo` []) >>> Lexemes)
 736 -}
 737
 738 newtype RuleLexemes = RuleLexemes {unRuleLexemes :: [Lexeme]}
 739   deriving (Eq, Ord, Show)
 740 instance HasTypeDefault RuleLexemes where
 741   typeDefault = RuleLexemes typeDefault
 742 instance Semigroup RuleLexemes where
 743   RuleLexemes x <> RuleLexemes y = RuleLexemes (x <> y)
 744 instance Monoid RuleLexemes where
 745   mempty = RuleLexemes mempty
 746 instance IsList RuleLexemes where
 747   type Item RuleLexemes = Lexeme
 748   fromList = RuleLexemes
 749   toList = unRuleLexemes
 750 instance IsString RuleLexemes where
 751   fromString s =
 752     s
 753       & Text.pack
 754       & runLexer
 755       & either
 756         errorShow
 757         ( List.dropWhileEnd (== LexemeBorder)
 758             >>> List.dropWhile (== LexemeBorder)
 759             >>> RuleLexemes
 760         )
 761
 762 newtype InputLexemes = InputLexemes {unInputLexemes :: [Lexeme]}
 763   deriving (Eq, Ord, Show)
 764 instance HasTypeDefault InputLexemes where
 765   typeDefault = InputLexemes typeDefault
 766 instance Semigroup InputLexemes where
 767   InputLexemes x <> InputLexemes y = InputLexemes (x <> y)
 768 instance Monoid InputLexemes where
 769   mempty = InputLexemes mempty
 770 instance IsList InputLexemes where
 771   type Item InputLexemes = Lexeme
 772   fromList = InputLexemes
 773   toList = unInputLexemes
 774 instance IsString InputLexemes where
 775   fromString s =
 776     s
 777       & Text.pack
 778       & runLexer
 779       & either errorShow InputLexemes
 780
 781 instance P.ShowErrorComponent () where
 782   showErrorComponent = show
 783   errorComponentLen _ = 2
 784 instance P.VisualStream [Lexeme] where
 785   showTokens _s = show
 786   tokensLength _s xs = xs <&> (show >>> List.length) & sum
 787 instance P.TraversableStream [Lexeme] where
 788   reachOffset off pos = (Nothing, pos{P.pstateOffset = P.pstateOffset pos + off})
 789
 790 data LexemeTag
 791   = LexemeTagLetter
 792   | LexemeTagSpace
 793   | LexemeTagPunctuation
 794   | LexemeTagSeparator
 795   | LexemeTagMark
 796   | LexemeTagSymbol
 797   | LexemeTagDefinition
 798   | LexemeTagBorder
 799   deriving (Eq, Ord, Show)
 800 deriving instance Ord (IPA.Syllable [])
 801 deriving instance Ord IPA.SuprasegmentalFeature
 802 deriving instance Ord IPA.SegmentalFeature
 803 deriving instance Ord IPA.Sibilance
 804 deriving instance Ord IPA.Manner
 805 deriving instance Ord IPA.Phonation
 806 deriving instance Ord IPA.Roundedness
 807 deriving instance Ord IPA.Height
 808 deriving instance Ord IPA.Vowel
 809 deriving instance Ord IPA.Consonant
 810 deriving instance Ord IPA.Segment
 811
 812 {-
 813 tableToMatch :: Table -> [Lexeme] -> [Pronunciations]
 814 tableToMatch tbl = loop
 815   where
 816   loop prevBorder = \case
 817     InputText inp ->
 818       [ (matchingLength, )
 819       | (trans, transMach) <- chunk & chunkMachine & machineAlts & Map.toList
 820       , let matchingLength = transMatchingLength input trans
 821       , 0 < matchingLength || not (isTransConsume trans)
 822       , let (inputRead, inputRest) = input & Text.splitAt matchingLength
 823       ]
 824         & Map.fromListWith (\new old -> old)
 825         & Map.lookupMax
 826         <&> snd
 827
 828 -}
 829 tableHtml :: Table -> IO HTML.Html
 830 tableHtml tbl = do
 831   dataPath <- Self.getDataDir <&> File.normalise
 832   let title :: String = "LexerDict"
 833   let pageOrientation = Paper.PageOrientationPortrait
 834   let pageSize = Paper.PageSizeA4
 835   let partLangue = LangueFrançais
 836   return do
 837     HTML.docTypeHtml do
 838       HTML.head do
 839         HTML.title $ title & HTML.toHtml
 840         forM_
 841           ( [ "styles/Paper.css"
 842             , "styles/French/Lexer.css"
 843             , "styles/Rosetta/Reading.css"
 844             ]
 845               & list
 846           )
 847           \cssFile ->
 848             HTML.link
 849               ! HA.rel "stylesheet"
 850               ! HA.type_ "text/css"
 851               ! HA.href (dataPath </> cssFile & HTML.toValue)
 852         HTML.styleCSS $ HTML.cssPrintPage pageOrientation pageSize
 853       -- HTML.styleCSS $ pageDifficulties & difficultyCSS
 854       HTML.body
 855         ! classes ["A4", "french-lexer"]
 856         $ do
 857           "\n"
 858           let rulesChunks = tbl & Map.toList & chunksOf 50
 859           forM_ rulesChunks \rules ->
 860             HTML.section
 861               ! classes
 862                 [ "sheet"
 863                 ]
 864               ! styles
 865                 []
 866               $ do
 867                 forM_ (rules & List.zip [1 :: Int ..]) \(ruleIndex, (rulePat, Rule{..})) -> do
 868                   "\n"
 869                   HTML.div
 870                     ! classes
 871                       [ "dict-entry"
 872                       , if even ruleIndex then "even" else "odd"
 873                       ]
 874                     ! styles
 875                       []
 876                     $ do
 877                       "\n"
 878                       HTML.div
 879                         ! classes
 880                           [ "dict-key"
 881                           , "lang-" <> className partLangue
 882                           ]
 883                         ! styles
 884                           []
 885                         -- "grid-template-columns" :=
 886                         --  (0.5 & cm & HTML.toCSS)
 887                         --    & List.replicate lexerDictMaxKeyLength
 888                         --    & List.unwords
 889
 890                         $ do
 891                           forM_ (["model"] :: [String]) \rowKind -> do
 892                             forM_ (rulePat & unRuleLexemes) \ruleChar -> do
 893                               -- let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
 894                               case ruleChar of
 895                                 LexemeChar c -> do
 896                                   HTML.span
 897                                     ! classes
 898                                       [ "dict-key-cell"
 899                                       , rowKind
 900                                       -- , "script-" <> className uniScript
 901                                       ]
 902                                     $ do
 903                                       c & HTML.toHtml
 904                                 _ -> ""
 905                       HTML.div
 906                         ! classes
 907                           [ "dict-pronunciation"
 908                           ]
 909                         $ do
 910                           -- HTML.span ! classes ["arrow"] $ "→"
 911                           case rulePron of
 912                             Pronunciations
 913                               { unPronunciations =
 914                                 all (snd >>> pronunciationIPABroad >>> all IPA.isSilent) -> True
 915                               } -> ""
 916                             Pronunciations{unPronunciations = is} ->
 917                               is
 918                                 & foldMap (snd >>> pronunciationIPABroad >>> foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic))
 919                                 & HTML.toHtml
 920                       HTML.div
 921                         ! classes
 922                           [ "dict-lexeme"
 923                           ]
 924                         $ do
 925                           -- HTML.span ! classes ["arrow"] $ "→"
 926                           forM_ (ruleExamples & Map.toList) \(_inp, Pronunciation{..}) -> do
 927                             HTML.span
 928                               ! classes
 929                                 []
 930                               $ do
 931                                 case pronunciationIPABroad of
 932                                   [] -> pronunciationText & HTML.toHtml
 933                                   _ -> pronunciationIPABroad & foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic) & HTML.toHtml
 934                             "; "