src/Language/Pronunciation.hs

   1 {-# LANGUAGE OverloadedLists #-}
   2 {-# LANGUAGE OverloadedStrings #-}
   3 {-# LANGUAGE RankNTypes #-}
   4 {-# LANGUAGE StandaloneDeriving #-}
   5
   6 module Language.Pronunciation where
   7
   8 import Control.Applicative (Alternative (..))
   9 import Control.Monad.Combinators qualified as P
  10 import Control.Monad.Trans.Class qualified as MT
  11 import Control.Monad.Trans.State qualified as MT
  12 import Data.List qualified as List
  13 import Data.List.Zipper qualified as LZ
  14 import Data.Map.Strict qualified as Map
  15 import Data.Set qualified as Set
  16 import Data.Text qualified as Text
  17 import Data.Text.Short qualified as TextShort
  18 import Data.Traversable (traverse)
  19 import Language
  20 import Paths_worksheets qualified as Self
  21 import System.FilePath.Posix ((</>))
  22 import System.FilePath.Posix qualified as File
  23 import Text.Blaze.Html5.Attributes qualified as HA
  24 import Text.Megaparsec qualified as P
  25 import Worksheets.Utils.Char qualified as Char
  26 import Worksheets.Utils.HTML (className, classes, styles, (!))
  27 import Worksheets.Utils.HTML qualified as HTML
  28 import Worksheets.Utils.IPA qualified as IPA
  29 import Worksheets.Utils.Paper qualified as Paper
  30 import Worksheets.Utils.Prelude
  31
  32 data Pronunciation = Pronunciation
  33   { pronunciationIPABroad :: [IPA.Syllable []]
  34   , pronunciationText :: Text
  35   }
  36   deriving (Eq, Ord, Show)
  37 instance Semigroup Pronunciation where
  38   x <> y =
  39     Pronunciation
  40       { pronunciationIPABroad = pronunciationIPABroad x <> pronunciationIPABroad y
  41       , pronunciationText =
  42           [pronunciationText x, pronunciationText y]
  43             & List.filter (/= "")
  44             & Text.intercalate "."
  45       }
  46 instance IsList Pronunciation where
  47   type Item Pronunciation = IPA.Syllable []
  48   toList = pronunciationIPABroad
  49
  50   -- fromList :: HasCallStack => [Item Pronunciation] -> Pronunciation
  51   fromList l =
  52     Pronunciation
  53       { pronunciationIPABroad = ipa
  54       , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
  55       }
  56     where
  57       ipa =
  58         l
  59           & mapButLast (IPA.WithSuprasegmentalFeature IPA.Break)
  60           & fromList
  61 instance IsString Pronunciation where
  62   fromString s =
  63     Pronunciation
  64       { pronunciationIPABroad = ipa
  65       , pronunciationText = ipa & foldMap (IPA.toIPA_ >>> IPA.unIPA)
  66       }
  67     where
  68       ipa =
  69         s
  70           & Text.pack
  71           & IPA.parseSyllables @[]
  72           & either errorShow id
  73
  74 {-
  75 data PronunciationKey = PronunciationKey
  76   { pronunciationKeyText :: Text
  77   , pronunciationKeyPron :: Pronunciation
  78   , pronunciationKey
  79   }
  80 -}
  81
  82 newtype Pronunciations = Pronunciations
  83   { unPronunciations :: [(RuleLexemes, Pronunciation)]
  84   }
  85   deriving (Eq, Ord)
  86   deriving newtype (Show)
  87   deriving newtype (Semigroup)
  88   deriving newtype (Monoid)
  89 joinPronunciations :: Pronunciations -> Pronunciations
  90 joinPronunciations (Pronunciations ps) =
  91   Pronunciations
  92     [ input :=
  93         Pronunciation
  94           { pronunciationIPABroad = ipa
  95           , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
  96           }
  97     ]
  98   where
  99     input :: RuleLexemes
 100     ipa :: [IPA.Syllable []]
 101     (input, ipa) =
 102       ps
 103         & List.foldr
 104           ( \(inp, Pronunciation{pronunciationIPABroad}) (suffix, l, acc) ->
 105               case pronunciationIPABroad of
 106                 [] -> (inp <> suffix, IPA.Syllable [], acc)
 107                 [syl@(IPA.Syllable [])] -> (inp <> suffix, syl, acc)
 108                 [syl] -> (inp <> suffix, IPA.Syllable [], (syl <> l) : acc)
 109                 [sylL, sylR] -> (inp <> suffix, sylL, glueSyllableToTheRight sylR acc)
 110                 _ -> errorShow pronunciationIPABroad
 111           )
 112           ( ""
 113           , IPA.Syllable []
 114           , []
 115           )
 116         & ( \(i, l, acc) ->
 117               ( i
 118               , glueSyllableToTheRight l acc
 119                   & mapButLast (IPA.setSuprasegmentalFeatures [IPA.Break])
 120               )
 121           )
 122     glueSyllableToTheRight ::
 123       IPA.Syllable [] ->
 124       [IPA.Syllable []] ->
 125       [IPA.Syllable []]
 126     glueSyllableToTheRight x y =
 127       case y of
 128         [] -> [x]
 129         [yL] -> [x <> yL]
 130         yL : yR -> x <> yL : yR
 131 instance IsList Pronunciations where
 132   type Item Pronunciations = (RuleLexemes, Pronunciation)
 133   toList = unPronunciations
 134   fromList l = Pronunciations{unPronunciations = l & fromList}
 135
 136 {-
 137 instance IsString Pronunciations where
 138   fromString = \case
 139     "" -> Pronunciations "" [IPA.Syllable [IPA.Zero]]
 140     s -> Pronunciations (s & Text.pack) $
 141           fromString s
 142             & IPA.parseSyllables @[]
 143             & either errorShow id
 144 -}
 145 data ExampleLiteral = ExampleLiteral
 146   { exampleLiteralText :: ShortText
 147   , exampleLiteralTags :: Set LiteralTag
 148   , exampleLiteralMeaning :: ShortText
 149   }
 150   deriving (Eq, Ord, Show)
 151 instance IsString ExampleLiteral where
 152   fromString s =
 153     ExampleLiteral
 154       { exampleLiteralText = s & fromString
 155       , exampleLiteralTags = Set.empty
 156       , exampleLiteralMeaning = ""
 157       }
 158 data LiteralTag
 159   = LiteralTagOccurence
 160   | LiteralTagMeta
 161   | LiteralTagSilent
 162   deriving (Eq, Ord, Show)
 163 exampleLiteralsText :: [ExampleLiteral] -> ShortText
 164 exampleLiteralsText ls = ls <&> exampleLiteralText & mconcat
 165
 166 hyphen =
 167   ExampleLiteral
 168     { exampleLiteralText = "-"
 169     , exampleLiteralTags = [LiteralTagMeta] & Set.fromList
 170     , exampleLiteralMeaning = ""
 171     }
 172 occurence lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagOccurence}
 173 silent lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagSilent}
 174
 175 type SyllableText = ShortText
 176 type SyllableBroad = IPA.Syllable []
 177 type SyllablesTable = Map SyllableText (Map SyllableText [([ExampleLiteral], SyllableBroad)])
 178
 179 data Adjacent
 180   = AdjacentBorder
 181   | AdjacentVowel
 182   | AdjacentConsonant
 183   deriving (Eq, Ord, Show)
 184 data Variant
 185   = VariantDefinition Text
 186   | VariantStress
 187   deriving (Eq, Ord, Show)
 188 data Rule = Rule
 189   { -- , ruleStress :: Bool
 190     -- , ruleDefinition :: Maybe Text
 191
 192     -- [ "e"    := ["ɛ"] , "x"    := ["g","z"] , "er"   := ["ɛʁ"] , "cice" := ["sis"] ]
 193     -- [ "exercice" := ["ɛg.zɛʁ.sis"]
 194     -- ]
 195     rulePron :: Pronunciations
 196   , ruleExamples :: Map InputLexemes Pronunciation
 197   }
 198   deriving (Eq, Ord, Show)
 199
 200 rule =
 201   Rule
 202     { rulePron = Pronunciations{unPronunciations = []}
 203     , ruleExamples = mempty
 204     }
 205 space = rule
 206 word
 207   , begining
 208   , ending ::
 209     RuleLexemes -> RuleLexemes
 210 word = begining >>> ending
 211 begining = after [LexemeBorder]
 212 ending = before [LexemeBorder]
 213 before ls r = RuleLexemes (unRuleLexemes r <> ls)
 214 after ls r = RuleLexemes (ls <> unRuleLexemes r)
 215 meaning r d = RuleLexemes (unRuleLexemes r <> [LexemeMeaning d])
 216
 217 type Table = Map RuleLexemes Rule
 218 examples :: Table -> Map InputLexemes Pronunciation
 219 examples tbl =
 220   [ v & ruleExamples
 221   | v <- tbl & Map.elems
 222   ]
 223     & Map.unionsWith (\new old -> if new == old then new else errorShow (new, old))
 224
 225 data Pron = Pron
 226   { pronInput :: [Lexeme]
 227   , pronRule :: Rule
 228   }
 229   deriving (Eq, Show)
 230
 231 data Syl = Syl
 232   { sylText :: Text
 233   , sylDependsOnBefore :: Bool
 234   , sylDependsOnAfter :: Bool
 235   , sylDependsOnMeaning :: Bool
 236   , sylSound :: Text -- [IPA.Syllable []]
 237   , sylIndex :: Int
 238   , sylSilent :: Bool
 239   , sylSplit :: Bool
 240   }
 241   deriving (Eq, Show)
 242
 243 addIndexes :: [[Either Char Pron]] -> [[Syl]]
 244 addIndexes = go 0
 245   where
 246     go _idx [] = []
 247     go idx (prons : next) = List.reverse prons' : go idx' next
 248       where
 249         (idx', prons') =
 250           prons
 251             & List.foldl'
 252               ( \(i, is) -> \case
 253                   Left c ->
 254                     ( i
 255                     , Syl
 256                         { sylText = Text.singleton c
 257                         , sylDependsOnAfter = False
 258                         , sylDependsOnBefore = False
 259                         , sylDependsOnMeaning = False
 260                         , sylSound = []
 261                         , sylIndex = i
 262                         , sylSilent = True
 263                         , sylSplit = False
 264                         }
 265                         : is
 266                     )
 267                   Right Pron{pronRule = Rule{rulePron = Pronunciations{unPronunciations = ps}}} ->
 268                     ps
 269                       & List.foldl'
 270                         ( \(j, js) (t, Pronunciation{..}) ->
 271                             let sylText = t & unRuleLexemes & lexemesChars & Text.pack
 272                             in case pronunciationIPABroad of
 273                                  []
 274                                    | not (Text.null pronunciationText) ->
 275                                        ( j + 1
 276                                        , Syl
 277                                            { sylText
 278                                            , sylSound = pronunciationText
 279                                            , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 280                                            , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 281                                            , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 282                                            , sylIndex = j + 1
 283                                            , sylSilent = False
 284                                            , sylSplit = False
 285                                            }
 286                                            : js
 287                                        )
 288                                  [IPA.Syllable []]
 289                                    | Text.null pronunciationText ->
 290                                        ( j
 291                                        , case js of
 292                                            [] ->
 293                                              Syl
 294                                                { sylText
 295                                                , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 296                                                , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 297                                                , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 298                                                , sylSound = ""
 299                                                , sylIndex = j
 300                                                , sylSilent = True
 301                                                , sylSplit = False
 302                                                }
 303                                                : js
 304                                            j0@Syl{sylText = j0t} : jss -> j0{sylText = j0t <> sylText} : jss
 305                                        )
 306                                  syls
 307                                    | (syls & all IPA.isSilent) && Text.null pronunciationText ->
 308                                        ( j
 309                                        , Syl
 310                                            { sylText
 311                                            , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 312                                            , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 313                                            , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 314                                            , sylSound = ""
 315                                            , sylIndex = j
 316                                            , sylSilent = True
 317                                            , sylSplit = False
 318                                            }
 319                                            : js
 320                                        )
 321                                  _syls@[_] ->
 322                                    ( j + 1
 323                                    , Syl
 324                                        { sylText
 325                                        , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 326                                        , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 327                                        , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 328                                        , sylSound = pronunciationText
 329                                        , sylIndex = j + 1
 330                                        , sylSilent = False
 331                                        , sylSplit = False
 332                                        }
 333                                        : js
 334                                    )
 335                                  _syls@[_, _] ->
 336                                    ( j
 337                                    , Syl
 338                                        { sylText
 339                                        , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 340                                        , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 341                                        , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 342                                        , sylSound = pronunciationText
 343                                        , sylIndex = j
 344                                        , sylSilent = False
 345                                        , sylSplit = True
 346                                        }
 347                                        : js
 348                                    )
 349                                  syls -> errorShow syls
 350                         )
 351                         (i, is)
 352                     where
 353                       sylDependsOnAfter =
 354                         List.reverse >>> \case
 355                           LexemeBorder : _ -> True
 356                           LexemeSilent : _ -> True
 357                           LexemeConsonant : _ -> True
 358                           LexemeDoubleConsonant : _ -> True
 359                           LexemeVowel : _ -> True
 360                           LexemeSemiVowel : _ -> True
 361                           _ -> False
 362                       sylDependsOnBefore =
 363                         \case
 364                           LexemeBorder : _ -> True
 365                           LexemeSilent : _ -> True
 366                           LexemeConsonant : _ -> True
 367                           LexemeDoubleConsonant : _ -> True
 368                           LexemeVowel : _ -> True
 369                           LexemeSemiVowel : _ -> True
 370                           _ -> False
 371                       sylDependsOnMeaning =
 372                         List.reverse >>> \case
 373                           LexemeMeaning{} : _ -> True
 374                           _ -> False
 375               )
 376               (idx, [])
 377
 378 withCapital :: [(RuleLexemes, Rule)] -> [(RuleLexemes, Rule)]
 379 withCapital =
 380   foldMap \(RuleLexemes pat, rul) ->
 381     [ (RuleLexemes pat, rul)
 382     ,
 383       ( RuleLexemes (withCapitalLexemes pat)
 384       , rul
 385           { rulePron = rul & rulePron & withCapitalPronunciations
 386           , ruleExamples =
 387               rul
 388                 & ruleExamples
 389                 & Map.mapKeys
 390                   ( unInputLexemes
 391                       >>> withCapitalLexemes
 392                       >>> InputLexemes
 393                   )
 394           }
 395       )
 396     ]
 397   where
 398     withCapitalPronunciations (Pronunciations []) = Pronunciations []
 399     withCapitalPronunciations (Pronunciations ((t, p) : ps)) =
 400       Pronunciations ((RuleLexemes $ withCapitalLexemes $ unRuleLexemes t, p) : ps)
 401     withCapitalLexemes (LexemeChar x : xs) = LexemeChar (Char.toUpper x) : xs
 402     withCapitalLexemes (x : xs) = x : withCapitalLexemes xs
 403     withCapitalLexemes [] = []
 404
 405 lexemesChars :: [Lexeme] -> [Char]
 406 lexemesChars p =
 407   p & foldMap \case
 408     LexemeChar c -> [c]
 409     _ -> []
 410
 411 run ::
 412   Table ->
 413   Text ->
 414   Either
 415     ( Either
 416         (P.ParseErrorBundle Text ())
 417         (P.ParseErrorBundle [Lexeme] ())
 418     )
 419     [Either Char Pron]
 420 run rules inp =
 421   inp
 422     & runLexer
 423     & either (Left . Left) \lexs ->
 424       lexs
 425         & runParser rules
 426         & either (Left . Right) Right
 427
 428 runParser :: Table -> [Lexeme] -> Either (P.ParseErrorBundle [Lexeme] ()) [Either Char Pron]
 429 runParser tbl inp = inp & P.runParser (parser tbl) "input"
 430
 431 parseLiterals ::
 432   Table ->
 433   [ExampleLiteral] ->
 434   Either
 435     ( Either
 436         (P.ParseErrorBundle Text ())
 437         (P.ParseErrorBundle [Lexeme] ())
 438     )
 439     [Either Char Pron]
 440 parseLiterals rules inp =
 441   inp
 442     & traverse
 443       ( \ExampleLiteral{..} ->
 444           exampleLiteralText
 445             & TextShort.toText
 446             & runLexer
 447             <&> ( <>
 448                     [ LexemeMeaning exampleLiteralMeaning
 449                     | exampleLiteralMeaning & TextShort.null & not
 450                     ]
 451                 )
 452       )
 453     & either (Left . Left) \lexs ->
 454       lexs
 455         & mconcat
 456         & runParser rules
 457         & either (Left . Right) Right
 458
 459 parser :: Table -> P.Parsec () [Lexeme] [Either Char Pron]
 460 parser tbl = do
 461   res <- P.many $ (Just . Right) <$> parseRules <|> parseChar
 462   P.eof
 463   return $ res & catMaybes
 464   where
 465     -- Match one of the rules, trying longuest first
 466     parseRules :: P.Parsec () [Lexeme] Pron
 467     parseRules =
 468       P.choice
 469         [ parseRule r
 470         | r <- tbl & Map.toDescList
 471         ]
 472     {-
 473     <|> P.choice
 474       [ parseRule
 475         ( RuleLexemes $
 476             rulePat & unRuleLexemes <&> \case
 477               LexemeChar c -> LexemeChar (c & Char.toUpper)
 478               x -> x
 479         , curRule
 480         )
 481       | (rulePat, curRule) <- tbl & Map.toDescList
 482       ]
 483     -}
 484     parseRule (rulePat, curRule@Rule{..}) =
 485       P.try do
 486         let
 487           pat = rulePat & unRuleLexemes
 488           patSep = (`List.elem` list [LexemeVowel, LexemeSemiVowel, LexemeConsonant, LexemeSilent])
 489           -- (patEnd, patBegin) = pat & List.reverse & List.span patSep
 490           patBegin = pat & List.dropWhileEnd patSep
 491           patEnd = pat & List.reverse & List.takeWhile patSep & List.reverse
 492         -- parse without the ending Lexeme{Vowel,SemiVowel,Consonant}
 493         P.chunk patBegin
 494         inpAfterBegin <- P.getInput
 495         unless (List.null patEnd) do
 496           inpWithAhead <- parseAhead
 497           -- traceShowM ("inpWithAhead"::Text, inpWithAhead)
 498           P.setInput inpWithAhead
 499           P.chunk patEnd & void
 500         -- insert the Lexeme{Vowel,SemiVowel,Consonant} from the output of the current rule
 501         let lastSound =
 502               rulePron
 503                 & unPronunciations
 504                 & List.reverse
 505                 & headMaybe
 506                 & maybe
 507                   []
 508                   ( snd
 509                       >>> pronunciationIPABroad
 510                       >>> List.reverse
 511                       >>> headMaybe
 512                       >>> maybe [] (IPA.syllableToSegments >>> List.reverse >>> lexemeHeadSound)
 513                   )
 514         P.setInput $ lastSound <> inpAfterBegin
 515         return Pron{pronInput = pat, pronRule = curRule}
 516     parseChar :: P.Parsec () [Lexeme] (Maybe (Either Char Pron))
 517     parseChar =
 518       P.anySingle <&> \case
 519         LexemeChar c -> Just $ Left c
 520         _ -> Nothing
 521     parseAhead :: P.Parsec () [Lexeme] [Lexeme]
 522     parseAhead = do
 523       nextStep <- P.observing $ Right <$> parseRules <|> Left <$> P.anySingle
 524       -- traceShowM ("nextStep"::Text, nextStep & either (\err -> Left ()) Right)
 525       case nextStep of
 526         Right (Right Pron{pronInput, pronRule}) -> do
 527           let x =
 528                 pronRule
 529                   & rulePron
 530                   & unPronunciations
 531                   & headMaybe
 532                   & maybe
 533                     []
 534                     ( snd
 535                         >>> pronunciationIPABroad
 536                         >>> headMaybe
 537                         >>> maybe [] (IPA.syllableToSegments >>> lexemeHeadSound)
 538                     )
 539           inp <- P.getInput
 540           return $ x <> pronInput <> inp
 541         Right (Left lex) -> do
 542           parseAhead <&> (lex :)
 543         Left{} -> P.getInput
 544     lexemeHeadSound :: [_] -> [Lexeme]
 545     lexemeHeadSound =
 546       headMaybe >>> fmap IPA.dropSegmentalFeatures >>> \case
 547         Just IPA.Zero{} -> [LexemeSilent]
 548         Just IPA.Vowel{} -> [LexemeVowel]
 549         Just (IPA.Consonant consonant) -> do
 550           case consonant of
 551             IPA.Pulmonic _phonation _place IPA.Approximant -> [LexemeSemiVowel]
 552             IPA.Ejective _place IPA.Approximant -> [LexemeSemiVowel]
 553             _ -> [LexemeConsonant]
 554         _ -> [] -- error
 555
 556 runLexer :: Text -> Either (P.ParseErrorBundle Text ()) [Lexeme]
 557 runLexer inp = inp & P.runParser lexer "input"
 558
 559 exampleLiteralsLexemes :: [ExampleLiteral] -> [Lexeme]
 560 exampleLiteralsLexemes ls =
 561   ls & foldMap \ExampleLiteral{..} ->
 562     unRuleLexemes (fromString (TextShort.unpack exampleLiteralText))
 563       <> [ LexemeMeaning exampleLiteralMeaning
 564          ]
 565
 566 lexer :: P.Parsec () Text [Lexeme]
 567 lexer = do
 568   lls <- P.many do
 569     P.choice $
 570       list
 571         [ P.takeWhile1P Nothing Char.isSpace >>= \cs ->
 572             return [LexemeChar c | c <- cs & Text.unpack]
 573         , do
 574             cs <- P.takeWhile1P Nothing Char.isLetter
 575             mean <- (<|> return []) $ P.try do
 576               P.single '{'
 577               m <- P.takeWhile1P Nothing (/= '}')
 578               P.single '}'
 579               return [LexemeMeaning (TextShort.fromText m)]
 580             return $
 581               LexemeBorder
 582                 : [LexemeChar c | c <- cs & Text.unpack]
 583                   <> mean
 584                   <> [LexemeBorder]
 585         , P.takeWhile1P Nothing Char.isNumber >>= \cs ->
 586             return (LexemeBorder : ([LexemeChar c | c <- cs & Text.unpack] <> [LexemeBorder]))
 587         , P.satisfy Char.isSymbol >>= \c ->
 588             return [LexemeChar c]
 589         , P.satisfy Char.isSeparator >>= \c ->
 590             return [LexemeChar c]
 591         , P.satisfy Char.isMark >>= \c ->
 592             return [LexemeChar c]
 593         , P.satisfy Char.isPunctuation >>= \c ->
 594             return [LexemeChar c]
 595         ]
 596   P.eof
 597   return $ mconcat lls
 598
 599 words :: [Either Char Pron] -> [[Either Char Pron]]
 600 words [] = []
 601 words prons = word0 : words next
 602   where
 603     (word0, rest) = prons & List.span (isSep >>> not)
 604     (_sep, next) = rest & List.span isSep
 605     isSep = \case
 606       Left c | c & Char.isSpace -> True
 607       _ -> False
 608
 609 {-
 610 case statePats st & Map.lookup k of
 611   Nothing -> st
 612   Just (PatTree pats) ->
 613     loop st {statePats = pats, stateBuffer = k : stateBuffer st}
 614   Just (PatEnd end) ->
 615     loop st { statePats = initPats
 616        , stateBuffer = []
 617        , stateInput = stateInput st
 618            & LZ.insert Inp
 619               { inpPats = stateBuffer st & List.reverse
 620               , inpPronunciations = end
 621               }
 622            & LZ.right
 623        }
 624 -}
 625
 626 {-
 627 parse :: PatTree -> Text -> [Inp]
 628 parse initPats input =
 629   let inpZip = input & Text.unpack & fmap charToInp & LZ.fromList in
 630   runInp [] initPats inpZip & LZ.toList
 631   where
 632   charToInp :: Char -> Inp
 633   charToInp c = Inp
 634     { inpPats = [PosNext (PatternChar c)]
 635     , inpPronunciations = []
 636     }
 637   runInp :: [Pos] -> PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
 638   runInp oks pat inp =
 639     traceShow ("runInp"::Text, ("oks"::Text) := oks, ("cur"::Text) := LZ.safeCursor inp) $
 640     case pat of
 641       PatEnd end ->
 642         -- the pattern ends
 643         inp
 644         & LZ.insert Inp
 645           { inpPats = oks & List.reverse
 646           , inpPronunciations = end
 647           }
 648         & LZ.right
 649         & runInp [] initPats
 650       PatTree pats ->
 651         -- the pattern may go on
 652         case inp & LZ.safeCursor of
 653           Nothing ->
 654             inp
 655               & runPat [] oks [PosNext PatternLexicalBorder] pats
 656           Just cur ->
 657             inp & LZ.delete
 658                 & runPat [] oks (inpPats cur & List.sort) pats
 659
 660   runPat :: [Pos] -> [Pos] -> [Pos] -> Map Pos PatTree -> LZ.Zipper Inp -> LZ.Zipper Inp
 661   runPat kos oks todos pats inp =
 662     traceShow ( "runPat"::Text
 663               , ("kos"::Text) := kos
 664               , ("oks"::Text) := oks
 665               , ("todos"::Text) := todos
 666               , ("cur"::Text) :=LZ.safeCursor inp
 667               ) $
 668     case todos of
 669       [] | LZ.endp inp -> inp
 670         & runInp kos (PatEnd [])
 671         & runInp oks (PatEnd [])
 672       [] ->
 673         -- nothing left to advance the pattern
 674         --traceShow ("runPat/[]"::Text) $
 675           inp
 676             & (if null kos then id else runInp kos (PatEnd []))
 677             & runInp oks (PatTree pats)
 678       k:ks ->
 679         case pats & Map.lookup k of
 680           -- the pattern ends
 681           Just (PatEnd end) ->
 682             --traceShow ("runPat/End"::Text) $
 683             -- ks forgotten
 684             inp
 685               & (if null kos then id else runInp kos (PatEnd []))
 686               & runInp (k:oks) (PatEnd end)
 687           -- the pattern advances
 688           Just (PatTree nextPats) ->
 689             --traceShow ("runPat/Node"::Text) $
 690             inp & runPat kos (k:oks) ks nextPats
 691           -- the pattern does not advance
 692           Nothing ->
 693             inp & runPat (k:kos) oks ks pats
 694 -}
 695
 696 data Lexeme
 697   = LexemeBorder
 698   | LexemeVowel
 699   | LexemeSemiVowel
 700   | LexemeConsonant
 701   | LexemeDoubleConsonant
 702   | LexemeSilent
 703   | LexemeMeaning ShortText
 704   | -- | `LexemeChar` is last to have priority when using `Map.toDescList`
 705     LexemeChar Char
 706   deriving (Eq, Ord, Show)
 707
 708 -- data Sound
 709 --   = SoundVowel
 710 --   | SoundConsonant
 711 --   deriving (Eq, Ord, Show)
 712
 713 {-
 714 newtype Lexemes = Lexemes { unLexemes :: [Lexeme] }
 715   deriving (Eq, Ord, Show)
 716 instance P.Stream Lexemes where
 717   type Token Lexemes = Lexeme
 718   type Tokens Lexemes = Lexemes
 719   tokensToChunk _px = Lexemes
 720   chunkToTokens _px = unLexemes
 721   chunkLength _px = unLexemes >>> List.length
 722   chunkEmpty _px = unLexemes >>> List.null
 723   take1_ = unLexemes >>> P.take1_ >>> coerce
 724   takeN_ n = unLexemes >>> P.takeN_ n >>> coerce
 725   takeWhile_ p = unLexemes >>> P.takeWhile_ p >>> coerce
 726
 727 instance IsString Lexemes where
 728   fromString s =
 729     s
 730       & Text.pack
 731       & runLexer
 732       & either
 733           errorShow
 734           ((`appEndo` []) >>> Lexemes)
 735 -}
 736
 737 newtype RuleLexemes = RuleLexemes {unRuleLexemes :: [Lexeme]}
 738   deriving (Eq, Ord, Show)
 739 instance HasTypeDefault RuleLexemes where
 740   typeDefault = RuleLexemes typeDefault
 741 instance Semigroup RuleLexemes where
 742   RuleLexemes x <> RuleLexemes y = RuleLexemes (x <> y)
 743 instance Monoid RuleLexemes where
 744   mempty = RuleLexemes mempty
 745 instance IsList RuleLexemes where
 746   type Item RuleLexemes = Lexeme
 747   fromList = RuleLexemes
 748   toList = unRuleLexemes
 749 instance IsString RuleLexemes where
 750   fromString s =
 751     s
 752       & Text.pack
 753       & runLexer
 754       & either
 755         errorShow
 756         ( List.dropWhileEnd (== LexemeBorder)
 757             >>> List.dropWhile (== LexemeBorder)
 758             >>> RuleLexemes
 759         )
 760
 761 newtype InputLexemes = InputLexemes {unInputLexemes :: [Lexeme]}
 762   deriving (Eq, Ord, Show)
 763 instance HasTypeDefault InputLexemes where
 764   typeDefault = InputLexemes typeDefault
 765 instance Semigroup InputLexemes where
 766   InputLexemes x <> InputLexemes y = InputLexemes (x <> y)
 767 instance Monoid InputLexemes where
 768   mempty = InputLexemes mempty
 769 instance IsList InputLexemes where
 770   type Item InputLexemes = Lexeme
 771   fromList = InputLexemes
 772   toList = unInputLexemes
 773 instance IsString InputLexemes where
 774   fromString s =
 775     s
 776       & Text.pack
 777       & runLexer
 778       & either errorShow InputLexemes
 779
 780 instance P.ShowErrorComponent () where
 781   showErrorComponent = show
 782   errorComponentLen _ = 2
 783 instance P.VisualStream [Lexeme] where
 784   showTokens _s = show
 785   tokensLength _s xs = xs <&> (show >>> List.length) & sum
 786 instance P.TraversableStream [Lexeme] where
 787   reachOffset off pos = (Nothing, pos{P.pstateOffset = P.pstateOffset pos + off})
 788
 789 data LexemeTag
 790   = LexemeTagLetter
 791   | LexemeTagSpace
 792   | LexemeTagPunctuation
 793   | LexemeTagSeparator
 794   | LexemeTagMark
 795   | LexemeTagSymbol
 796   | LexemeTagDefinition
 797   | LexemeTagBorder
 798   deriving (Eq, Ord, Show)
 799 deriving instance Ord (IPA.Syllable [])
 800 deriving instance Ord IPA.SuprasegmentalFeature
 801 deriving instance Ord IPA.SegmentalFeature
 802 deriving instance Ord IPA.Sibilance
 803 deriving instance Ord IPA.Manner
 804 deriving instance Ord IPA.Phonation
 805 deriving instance Ord IPA.Roundedness
 806 deriving instance Ord IPA.Height
 807 deriving instance Ord IPA.Vowel
 808 deriving instance Ord IPA.Consonant
 809 deriving instance Ord IPA.Segment
 810
 811 {-
 812 tableToMatch :: Table -> [Lexeme] -> [Pronunciations]
 813 tableToMatch tbl = loop
 814   where
 815   loop prevBorder = \case
 816     InputText inp ->
 817       [ (matchingLength, )
 818       | (trans, transMach) <- chunk & chunkMachine & machineAlts & Map.toList
 819       , let matchingLength = transMatchingLength input trans
 820       , 0 < matchingLength || not (isTransConsume trans)
 821       , let (inputRead, inputRest) = input & Text.splitAt matchingLength
 822       ]
 823         & Map.fromListWith (\new old -> old)
 824         & Map.lookupMax
 825         <&> snd
 826
 827 -}
 828 tableHtml :: Table -> IO HTML.Html
 829 tableHtml tbl = do
 830   dataPath <- Self.getDataDir <&> File.normalise
 831   let title :: String = "LexerDict"
 832   let pageOrientation = Paper.PageOrientationPortrait
 833   let pageSize = Paper.PageSizeA4
 834   let partLangue = LangueFrançais
 835   return do
 836     HTML.docTypeHtml do
 837       HTML.head do
 838         HTML.title $ title & HTML.toHtml
 839         forM_
 840           ( [ "styles/Paper.css"
 841             , "styles/French/Lexer.css"
 842             , "styles/Rosetta/Reading.css"
 843             ]
 844               & list
 845           )
 846           \cssFile ->
 847             HTML.link
 848               ! HA.rel "stylesheet"
 849               ! HA.type_ "text/css"
 850               ! HA.href (dataPath </> cssFile & HTML.toValue)
 851         HTML.styleCSS $ HTML.cssPrintPage pageOrientation pageSize
 852       -- HTML.styleCSS $ pageDifficulties & difficultyCSS
 853       HTML.body
 854         ! classes ["A4", "french-lexer"]
 855         $ do
 856           "\n"
 857           let rulesChunks = tbl & Map.toList & chunksOf 50
 858           forM_ rulesChunks \rules ->
 859             HTML.section
 860               ! classes
 861                 [ "sheet"
 862                 ]
 863               ! styles
 864                 []
 865               $ do
 866                 forM_ (rules & List.zip [1 :: Int ..]) \(ruleIndex, (rulePat, Rule{..})) -> do
 867                   "\n"
 868                   HTML.div
 869                     ! classes
 870                       [ "dict-entry"
 871                       , if even ruleIndex then "even" else "odd"
 872                       ]
 873                     ! styles
 874                       []
 875                     $ do
 876                       "\n"
 877                       HTML.div
 878                         ! classes
 879                           [ "dict-key"
 880                           , "lang-" <> className partLangue
 881                           ]
 882                         ! styles
 883                           []
 884                         -- "grid-template-columns" :=
 885                         --  (0.5 & cm & HTML.toCSS)
 886                         --    & List.replicate lexerDictMaxKeyLength
 887                         --    & List.unwords
 888
 889                         $ do
 890                           forM_ (["model"] :: [String]) \rowKind -> do
 891                             forM_ (rulePat & unRuleLexemes) \ruleChar -> do
 892                               -- let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
 893                               case ruleChar of
 894                                 LexemeChar c -> do
 895                                   HTML.span
 896                                     ! classes
 897                                       [ "dict-key-cell"
 898                                       , rowKind
 899                                       -- , "script-" <> className uniScript
 900                                       ]
 901                                     $ do
 902                                       c & HTML.toHtml
 903                                 _ -> ""
 904                       HTML.div
 905                         ! classes
 906                           [ "dict-pronunciation"
 907                           ]
 908                         $ do
 909                           -- HTML.span ! classes ["arrow"] $ "→"
 910                           case rulePron of
 911                             Pronunciations
 912                               { unPronunciations =
 913                                 all (snd >>> pronunciationIPABroad >>> all IPA.isSilent) -> True
 914                               } -> ""
 915                             Pronunciations{unPronunciations = is} ->
 916                               is
 917                                 & foldMap (snd >>> pronunciationIPABroad >>> foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic))
 918                                 & HTML.toHtml
 919                       HTML.div
 920                         ! classes
 921                           [ "dict-lexeme"
 922                           ]
 923                         $ do
 924                           -- HTML.span ! classes ["arrow"] $ "→"
 925                           forM_ (ruleExamples & Map.toList) \(_inp, Pronunciation{..}) -> do
 926                             HTML.span
 927                               ! classes
 928                                 []
 929                               $ do
 930                                 case pronunciationIPABroad of
 931                                   [] -> pronunciationText & HTML.toHtml
 932                                   _ -> pronunciationIPABroad & foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic) & HTML.toHtml
 933                             "; "