src/Language/Pronunciation.hs

   1 {-# LANGUAGE OverloadedLists #-}
   2 {-# LANGUAGE OverloadedStrings #-}
   3 {-# LANGUAGE RankNTypes #-}
   4 {-# LANGUAGE StandaloneDeriving #-}
   5
   6 module Language.Pronunciation where
   7
   8 import Control.Applicative (Alternative (..))
   9 import Control.Monad.Combinators qualified as P
  10 import Control.Monad.Trans.Class qualified as MT
  11 import Control.Monad.Trans.State qualified as MT
  12 import Data.List qualified as List
  13 import Data.List.Zipper qualified as LZ
  14 import Data.Map.Strict qualified as Map
  15 import Data.Set qualified as Set
  16 import Data.Text qualified as Text
  17 import Data.Text.Short qualified as TextShort
  18 import Data.Traversable (traverse)
  19 import Language
  20 import Paths_worksheets qualified as Self
  21 import System.FilePath.Posix ((</>))
  22 import System.FilePath.Posix qualified as File
  23 import Text.Blaze.Html5.Attributes qualified as HA
  24 import Text.Megaparsec qualified as P
  25 import Worksheets.Utils.Char qualified as Char
  26 import Worksheets.Utils.HTML (className, classes, styles, (!))
  27 import Worksheets.Utils.HTML qualified as HTML
  28 import Worksheets.Utils.IPA qualified as IPA
  29 import Worksheets.Utils.Paper qualified as Paper
  30 import Worksheets.Utils.Prelude
  31
  32 data Pronunciation = Pronunciation
  33   { pronunciationIPABroad :: [IPA.Syllable []]
  34   , pronunciationText :: Text
  35   }
  36   deriving (Eq, Ord, Show)
  37 instance Semigroup Pronunciation where
  38   x <> y =
  39     Pronunciation
  40       { pronunciationIPABroad = pronunciationIPABroad x <> pronunciationIPABroad y
  41       , pronunciationText =
  42           [pronunciationText x, pronunciationText y]
  43             & List.filter (/= "")
  44             & Text.intercalate "."
  45       }
  46 instance IsList Pronunciation where
  47   type Item Pronunciation = IPA.Syllable []
  48   toList = pronunciationIPABroad
  49
  50   -- fromList :: HasCallStack => [Item Pronunciation] -> Pronunciation
  51   fromList l =
  52     Pronunciation
  53       { pronunciationIPABroad = ipa
  54       , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
  55       }
  56     where
  57       ipa =
  58         l
  59           & mapButLast (IPA.WithSuprasegmentalFeature IPA.Break)
  60           & fromList
  61 instance IsString Pronunciation where
  62   fromString s =
  63     Pronunciation
  64       { pronunciationIPABroad = ipa
  65       , pronunciationText = ipa & foldMap (IPA.toIPA_ >>> IPA.unIPA)
  66       }
  67     where
  68       ipa =
  69         s
  70           & Text.pack
  71           & IPA.parseSyllables @[]
  72           & either errorShow id
  73
  74 {-
  75 data PronunciationKey = PronunciationKey
  76   { pronunciationKeyText :: Text
  77   , pronunciationKeyPron :: Pronunciation
  78   , pronunciationKey
  79   }
  80 -}
  81
  82 newtype Pronunciations = Pronunciations
  83   { unPronunciations :: [(RuleLexemes, Pronunciation)]
  84   }
  85   deriving (Eq, Ord)
  86   deriving newtype (Show)
  87   deriving newtype (Semigroup)
  88   deriving newtype (Monoid)
  89 joinPronunciations :: Pronunciations -> Pronunciations
  90 joinPronunciations (Pronunciations ps) =
  91   Pronunciations
  92     [ input :=
  93         Pronunciation
  94           { pronunciationIPABroad = ipa
  95           , pronunciationText = ipa & foldMap (IPA.toIPA >>> maybe "" IPA.unIPA)
  96           }
  97     ]
  98   where
  99     input :: RuleLexemes
 100     ipa :: [IPA.Syllable []]
 101     (input, ipa) =
 102       ps
 103         & List.foldr
 104           ( \(inp, Pronunciation{pronunciationIPABroad}) (suffix, l, acc) ->
 105               case pronunciationIPABroad of
 106                 [] -> (inp <> suffix, IPA.Syllable [], acc)
 107                 [syl@(IPA.Syllable [])] -> (inp <> suffix, syl, acc)
 108                 [syl] -> (inp <> suffix, IPA.Syllable [], (syl <> l) : acc)
 109                 [sylL, sylR] -> (inp <> suffix, sylL, glueSyllableToTheRight sylR acc)
 110                 _ -> errorShow pronunciationIPABroad
 111           )
 112           ( ""
 113           , IPA.Syllable []
 114           , []
 115           )
 116         & ( \(i, l, acc) ->
 117               ( i
 118               , glueSyllableToTheRight l acc
 119                   & mapButLast (IPA.setSuprasegmentalFeatures [IPA.Break])
 120               )
 121           )
 122     glueSyllableToTheRight ::
 123       IPA.Syllable [] ->
 124       [IPA.Syllable []] ->
 125       [IPA.Syllable []]
 126     glueSyllableToTheRight x y =
 127       case y of
 128         [] -> [x]
 129         [yL] -> [x <> yL]
 130         yL : yR -> x <> yL : yR
 131 instance IsList Pronunciations where
 132   type Item Pronunciations = (RuleLexemes, Pronunciation)
 133   toList = unPronunciations
 134   fromList l = Pronunciations{unPronunciations = l & fromList}
 135
 136 {-
 137 instance IsString Pronunciations where
 138   fromString = \case
 139     "" -> Pronunciations "" [IPA.Syllable [IPA.Zero]]
 140     s -> Pronunciations (s & Text.pack) $
 141           fromString s
 142             & IPA.parseSyllables @[]
 143             & either errorShow id
 144 -}
 145 data ExampleLiteral = ExampleLiteral
 146   { exampleLiteralText :: ShortText
 147   , exampleLiteralTags :: Set LiteralTag
 148   , exampleLiteralMeaning :: ShortText
 149   }
 150   deriving (Eq, Ord, Show)
 151 instance IsString ExampleLiteral where
 152   fromString s =
 153     ExampleLiteral
 154       { exampleLiteralText = s & fromString
 155       , exampleLiteralTags = Set.empty
 156       , exampleLiteralMeaning = ""
 157       }
 158 data LiteralTag
 159   = LiteralTagOccurence
 160   | LiteralTagMeta
 161   | LiteralTagSilent
 162   deriving (Eq, Ord, Show)
 163 exampleLiteralsText :: [ExampleLiteral] -> ShortText
 164 exampleLiteralsText ls = ls <&> exampleLiteralText & mconcat
 165
 166 hyphen =
 167   ExampleLiteral
 168     { exampleLiteralText = "-"
 169     , exampleLiteralTags = [LiteralTagMeta] & Set.fromList
 170     , exampleLiteralMeaning = ""
 171     }
 172 occurence lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagOccurence}
 173 silent lit = lit{exampleLiteralTags = lit & exampleLiteralTags & Set.insert LiteralTagSilent}
 174
 175 type SyllableText = ShortText
 176 type SyllableBroad = IPA.Syllable []
 177 type SyllablesTable = Map SyllableText (Map SyllableText [([ExampleLiteral], SyllableBroad)])
 178
 179 data Adjacent
 180   = AdjacentBorder
 181   | AdjacentVowel
 182   | AdjacentConsonant
 183   deriving (Eq, Ord, Show)
 184 data Variant
 185   = VariantDefinition Text
 186   | VariantStress
 187   deriving (Eq, Ord, Show)
 188 data Rule = Rule
 189   { -- , ruleStress :: Bool
 190     -- , ruleDefinition :: Maybe Text
 191
 192     -- [ "e"    := ["ɛ"] , "x"    := ["g","z"] , "er"   := ["ɛʁ"] , "cice" := ["sis"] ]
 193     -- [ "exercice" := ["ɛg.zɛʁ.sis"]
 194     -- ]
 195     rulePron :: Pronunciations
 196   , ruleExamples :: Map InputLexemes Pronunciation
 197   }
 198   deriving (Eq, Ord, Show)
 199
 200 rule =
 201   Rule
 202     { rulePron = Pronunciations{unPronunciations = []}
 203     , ruleExamples = mempty
 204     }
 205 space = rule
 206 word
 207   , begining
 208   , ending ::
 209     RuleLexemes -> RuleLexemes
 210 word = begining >>> ending
 211 begining = after [LexemeBorder]
 212 ending = before [LexemeBorder]
 213 before ls r = RuleLexemes (unRuleLexemes r <> ls)
 214 after ls r = RuleLexemes (ls <> unRuleLexemes r)
 215 meaning r d = RuleLexemes (unRuleLexemes r <> [LexemeMeaning d])
 216
 217 type Table = Map RuleLexemes Rule
 218 examples :: Table -> Map InputLexemes Pronunciation
 219 examples tbl =
 220   [ v & ruleExamples
 221   | v <- tbl & Map.elems
 222   ]
 223     & Map.unionsWith (\new old -> if new == old then new else errorShow (new, old))
 224
 225 data Pron = Pron
 226   { pronInput :: [Lexeme]
 227   , pronRule :: Rule
 228   }
 229   deriving (Eq, Show)
 230
 231 data Syl = Syl
 232   { sylText :: Text
 233   , sylDependsOnBefore :: Bool
 234   , sylDependsOnAfter :: Bool
 235   , sylDependsOnMeaning :: Bool
 236   , sylSound :: Text -- [IPA.Syllable []]
 237   , sylIndex :: Int
 238   , sylSilent :: Bool
 239   , sylSplit :: Bool
 240   }
 241
 242 addIndexes :: [[Either Char Pron]] -> [[Syl]]
 243 addIndexes = go 0
 244   where
 245     go _idx [] = []
 246     go idx (prons : next) = List.reverse prons' : go idx' next
 247       where
 248         (idx', prons') =
 249           prons
 250             & List.foldl'
 251               ( \(i, is) -> \case
 252                   Left c ->
 253                     ( i
 254                     , Syl
 255                         { sylText = Text.singleton c
 256                         , sylDependsOnAfter = False
 257                         , sylDependsOnBefore = False
 258                         , sylDependsOnMeaning = False
 259                         , sylSound = []
 260                         , sylIndex = i
 261                         , sylSilent = True
 262                         , sylSplit = False
 263                         }
 264                         : is
 265                     )
 266                   Right Pron{pronRule = Rule{rulePron = Pronunciations{unPronunciations = ps}}} ->
 267                     ps
 268                       & List.foldl'
 269                         ( \(j, js) (t, Pronunciation{..}) ->
 270                             let sylText = t & unRuleLexemes & lexemesChars & Text.pack
 271                             in case pronunciationIPABroad of
 272                                 []
 273                                   | not (Text.null pronunciationText) ->
 274                                       ( j + 1
 275                                       , Syl
 276                                           { sylText
 277                                           , sylSound = pronunciationText
 278                                           , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 279                                           , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 280                                           , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 281                                           , sylIndex = j + 1
 282                                           , sylSilent = False
 283                                           , sylSplit = False
 284                                           }
 285                                           : js
 286                                       )
 287                                 [IPA.Syllable []]
 288                                   | Text.null pronunciationText ->
 289                                       ( j
 290                                       , case js of
 291                                           [] ->
 292                                             Syl
 293                                               { sylText
 294                                               , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 295                                               , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 296                                               , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 297                                               , sylSound = ""
 298                                               , sylIndex = j
 299                                               , sylSilent = True
 300                                               , sylSplit = False
 301                                               }
 302                                               : js
 303                                           j0@Syl{sylText = j0t} : jss -> j0{sylText = j0t <> sylText} : jss
 304                                       )
 305                                 syls
 306                                   | (syls & all IPA.isSilent) && Text.null pronunciationText ->
 307                                       ( j
 308                                       , Syl
 309                                           { sylText
 310                                           , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 311                                           , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 312                                           , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 313                                           , sylSound = ""
 314                                           , sylIndex = j
 315                                           , sylSilent = True
 316                                           , sylSplit = False
 317                                           }
 318                                           : js
 319                                       )
 320                                 _syls@[_] ->
 321                                   ( j + 1
 322                                   , Syl
 323                                       { sylText
 324                                       , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 325                                       , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 326                                       , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 327                                       , sylSound = pronunciationText
 328                                       , sylIndex = j + 1
 329                                       , sylSilent = False
 330                                       , sylSplit = False
 331                                       }
 332                                       : js
 333                                   )
 334                                 _syls@[_, _] ->
 335                                   ( j
 336                                   , Syl
 337                                       { sylText
 338                                       , sylDependsOnBefore = t & unRuleLexemes & sylDependsOnBefore
 339                                       , sylDependsOnAfter = t & unRuleLexemes & sylDependsOnAfter
 340                                       , sylDependsOnMeaning = t & unRuleLexemes & sylDependsOnMeaning
 341                                       , sylSound = pronunciationText
 342                                       , sylIndex = j
 343                                       , sylSilent = False
 344                                       , sylSplit = True
 345                                       }
 346                                       : js
 347                                   )
 348                                 syls -> errorShow syls
 349                         )
 350                         (i, is)
 351                     where
 352                       sylDependsOnAfter =
 353                         List.reverse >>> \case
 354                           LexemeBorder : _ -> True
 355                           LexemeSilent : _ -> True
 356                           LexemeConsonant : _ -> True
 357                           LexemeDoubleConsonant : _ -> True
 358                           LexemeVowel : _ -> True
 359                           LexemeSemiVowel : _ -> True
 360                           _ -> False
 361                       sylDependsOnBefore =
 362                         \case
 363                           LexemeBorder : _ -> True
 364                           LexemeSilent : _ -> True
 365                           LexemeConsonant : _ -> True
 366                           LexemeDoubleConsonant : _ -> True
 367                           LexemeVowel : _ -> True
 368                           LexemeSemiVowel : _ -> True
 369                           _ -> False
 370                       sylDependsOnMeaning =
 371                         List.reverse >>> \case
 372                           LexemeMeaning{} : _ -> True
 373                           _ -> False
 374               )
 375               (idx, [])
 376
 377 withCapital :: [(RuleLexemes, Rule)] -> [(RuleLexemes, Rule)]
 378 withCapital =
 379   foldMap \(RuleLexemes pat, rul) ->
 380     [ (RuleLexemes pat, rul)
 381     ,
 382       ( RuleLexemes (withCapitalLexemes pat)
 383       , rul
 384           { rulePron = rul & rulePron & withCapitalPronunciations
 385           , ruleExamples =
 386               rul
 387                 & ruleExamples
 388                 & Map.mapKeys
 389                   ( unInputLexemes
 390                       >>> withCapitalLexemes
 391                       >>> InputLexemes
 392                   )
 393           }
 394       )
 395     ]
 396   where
 397     withCapitalPronunciations (Pronunciations []) = Pronunciations []
 398     withCapitalPronunciations (Pronunciations ((t, p) : ps)) =
 399       Pronunciations ((RuleLexemes $ withCapitalLexemes $ unRuleLexemes t, p) : ps)
 400     withCapitalLexemes (LexemeChar x : xs) = LexemeChar (Char.toUpper x) : xs
 401     withCapitalLexemes (x : xs) = x : withCapitalLexemes xs
 402     withCapitalLexemes [] = []
 403
 404 lexemesChars :: [Lexeme] -> [Char]
 405 lexemesChars p =
 406   p & foldMap \case
 407     LexemeChar c -> [c]
 408     _ -> []
 409
 410 run ::
 411   Table ->
 412   Text ->
 413   Either
 414     ( Either
 415         (P.ParseErrorBundle Text ())
 416         (P.ParseErrorBundle [Lexeme] ())
 417     )
 418     [Either Char Pron]
 419 run rules inp =
 420   inp
 421     & runLexer
 422     & either (Left . Left) \lexs ->
 423       lexs
 424         & runParser rules
 425         & either (Left . Right) Right
 426
 427 runParser :: Table -> [Lexeme] -> Either (P.ParseErrorBundle [Lexeme] ()) [Either Char Pron]
 428 runParser tbl inp = inp & P.runParser (parser tbl) "input"
 429
 430 parseLiterals ::
 431   Table ->
 432   [ExampleLiteral] ->
 433   Either
 434     ( Either
 435         (P.ParseErrorBundle Text ())
 436         (P.ParseErrorBundle [Lexeme] ())
 437     )
 438     [Either Char Pron]
 439 parseLiterals rules inp =
 440   inp
 441     & traverse
 442       ( \ExampleLiteral{..} ->
 443           exampleLiteralText
 444             & TextShort.toText
 445             & runLexer
 446             <&> ( <>
 447                     [ LexemeMeaning exampleLiteralMeaning
 448                     | exampleLiteralMeaning & TextShort.null & not
 449                     ]
 450                 )
 451       )
 452     & either (Left . Left) \lexs ->
 453       lexs
 454         & mconcat
 455         & runParser rules
 456         & either (Left . Right) Right
 457
 458 parser :: Table -> P.Parsec () [Lexeme] [Either Char Pron]
 459 parser tbl = do
 460   res <- P.many $ (Just . Right) <$> parseRules <|> parseChar
 461   P.eof
 462   return $ res & catMaybes
 463   where
 464     -- Match one of the rules, trying longuest first
 465     parseRules :: P.Parsec () [Lexeme] Pron
 466     parseRules =
 467       P.choice
 468         [ parseRule r
 469         | r <- tbl & Map.toDescList
 470         ]
 471     {-
 472     <|> P.choice
 473       [ parseRule
 474         ( RuleLexemes $
 475             rulePat & unRuleLexemes <&> \case
 476               LexemeChar c -> LexemeChar (c & Char.toUpper)
 477               x -> x
 478         , curRule
 479         )
 480       | (rulePat, curRule) <- tbl & Map.toDescList
 481       ]
 482     -}
 483     parseRule (rulePat, curRule@Rule{..}) =
 484       P.try do
 485         let
 486           pat = rulePat & unRuleLexemes
 487           patSep = (`List.elem` list [LexemeVowel, LexemeSemiVowel, LexemeConsonant, LexemeSilent])
 488           -- (patEnd, patBegin) = pat & List.reverse & List.span patSep
 489           patBegin = pat & List.dropWhileEnd patSep
 490           patEnd = pat & List.reverse & List.takeWhile patSep & List.reverse
 491         -- parse without the ending Lexeme{Vowel,SemiVowel,Consonant}
 492         P.chunk patBegin
 493         inpAfterBegin <- P.getInput
 494         unless (List.null patEnd) do
 495           inpWithAhead <- parseAhead
 496           -- traceShowM ("inpWithAhead"::Text, inpWithAhead)
 497           P.setInput inpWithAhead
 498           P.chunk patEnd & void
 499         -- insert the Lexeme{Vowel,SemiVowel,Consonant} from the output of the current rule
 500         let lastSound =
 501               rulePron
 502                 & unPronunciations
 503                 & List.reverse
 504                 & headMaybe
 505                 & maybe
 506                   []
 507                   ( snd
 508                       >>> pronunciationIPABroad
 509                       >>> List.reverse
 510                       >>> headMaybe
 511                       >>> maybe [] (IPA.syllableToSegments >>> List.reverse >>> lexemeHeadSound)
 512                   )
 513         P.setInput $ lastSound <> inpAfterBegin
 514         return Pron{pronInput = pat, pronRule = curRule}
 515     parseChar :: P.Parsec () [Lexeme] (Maybe (Either Char Pron))
 516     parseChar =
 517       P.anySingle <&> \case
 518         LexemeChar c -> Just $ Left c
 519         _ -> Nothing
 520     parseAhead :: P.Parsec () [Lexeme] [Lexeme]
 521     parseAhead = do
 522       nextStep <- P.observing $ Right <$> parseRules <|> Left <$> P.anySingle
 523       -- traceShowM ("nextStep"::Text, nextStep & either (\err -> Left ()) Right)
 524       case nextStep of
 525         Right (Right Pron{pronInput, pronRule}) -> do
 526           let x =
 527                 pronRule
 528                   & rulePron
 529                   & unPronunciations
 530                   & headMaybe
 531                   & maybe
 532                     []
 533                     ( snd
 534                         >>> pronunciationIPABroad
 535                         >>> headMaybe
 536                         >>> maybe [] (IPA.syllableToSegments >>> lexemeHeadSound)
 537                     )
 538           inp <- P.getInput
 539           return $ x <> pronInput <> inp
 540         Right (Left lex) -> do
 541           parseAhead <&> (lex :)
 542         Left{} -> P.getInput
 543     lexemeHeadSound :: [_] -> [Lexeme]
 544     lexemeHeadSound =
 545       headMaybe >>> fmap IPA.dropSegmentalFeatures >>> \case
 546         Just IPA.Zero{} -> [LexemeSilent]
 547         Just IPA.Vowel{} -> [LexemeVowel]
 548         Just (IPA.Consonant consonant) -> do
 549           case consonant of
 550             IPA.Pulmonic _phonation _place IPA.Approximant -> [LexemeSemiVowel]
 551             IPA.Ejective _place IPA.Approximant -> [LexemeSemiVowel]
 552             _ -> [LexemeConsonant]
 553         _ -> [] -- error
 554
 555 runLexer :: Text -> Either (P.ParseErrorBundle Text ()) [Lexeme]
 556 runLexer inp = inp & P.runParser lexer "input"
 557
 558 exampleLiteralsLexemes :: [ExampleLiteral] -> [Lexeme]
 559 exampleLiteralsLexemes ls =
 560   ls & foldMap \ExampleLiteral{..} ->
 561     unRuleLexemes (fromString (TextShort.unpack exampleLiteralText))
 562       <> [ LexemeMeaning exampleLiteralMeaning
 563          ]
 564
 565 lexer :: P.Parsec () Text [Lexeme]
 566 lexer = do
 567   lls <- P.many do
 568     P.choice $
 569       list
 570         [ P.takeWhile1P Nothing Char.isSpace >>= \cs ->
 571             return [LexemeChar c | c <- cs & Text.unpack]
 572         , do
 573             cs <- P.takeWhile1P Nothing Char.isLetter
 574             mean <- (<|> return []) $ P.try do
 575               P.single '{'
 576               m <- P.takeWhile1P Nothing (/= '}')
 577               P.single '}'
 578               return [LexemeMeaning (TextShort.fromText m)]
 579             return $
 580               LexemeBorder
 581                 : [LexemeChar c | c <- cs & Text.unpack]
 582                   <> mean
 583                   <> [LexemeBorder]
 584         , P.takeWhile1P Nothing Char.isNumber >>= \cs ->
 585             return (LexemeBorder : ([LexemeChar c | c <- cs & Text.unpack] <> [LexemeBorder]))
 586         , P.satisfy Char.isSymbol >>= \c ->
 587             return [LexemeChar c]
 588         , P.satisfy Char.isSeparator >>= \c ->
 589             return [LexemeChar c]
 590         , P.satisfy Char.isMark >>= \c ->
 591             return [LexemeChar c]
 592         , P.satisfy Char.isPunctuation >>= \c ->
 593             return [LexemeChar c]
 594         ]
 595   P.eof
 596   return $ mconcat lls
 597
 598 words :: [Either Char Pron] -> [[Either Char Pron]]
 599 words [] = []
 600 words prons = word0 : words next
 601   where
 602     (word0, rest) = prons & List.span (isSep >>> not)
 603     (_sep, next) = rest & List.span isSep
 604     isSep = \case
 605       Left c | c & Char.isSpace -> True
 606       _ -> False
 607
 608 data Input = Input
 609   { inputText :: Text
 610   , inputPhonetic :: [IPA.Syllable []]
 611   , inputMeaning :: Maybe ShortText
 612   }
 613
 614 patterns :: Map PatKey PatNode
 615 patterns =
 616   [ PatKeyNext (PatContextChar 'a') :=
 617       [ PatKeyNext PatContextLexicalBorder :=
 618           PatEnd ["a" := "ə"]
 619       ]
 620   , PatKeyNext (PatContextChar 't') :=
 621       [ PatKeyNext (PatContextChar 'h') :=
 622           [ PatKeyNext (PatContextChar 'e') :=
 623               [ PatKeyNext (PatContextLexicalCategory Char.Space) :=
 624                   PatEnd ["the" := "zi"]
 625               ]
 626           ]
 627       ]
 628   ]
 629
 630 data State = State
 631   { stateInput :: LZ.Zipper Inp
 632   , stateBuffer :: [PatKey]
 633   , statePats :: Map PatKey PatNode
 634   , statePatReset :: Bool
 635   }
 636
 637 parse :: Map PatKey PatNode -> Text -> [Inp]
 638 parse initPats input =
 639   loop
 640     State
 641       { stateInput = input & Text.unpack & fmap charToInp & LZ.fromList
 642       , stateBuffer = []
 643       , statePats = initPats
 644       , statePatReset = True
 645       }
 646     & stateInput
 647     & LZ.toList
 648   where
 649     charToInp :: Char -> Inp
 650     charToInp c =
 651       Inp
 652         { inpPats = [PatKeyNext (PatContextChar c)]
 653         , inpPronunciations = []
 654         }
 655     loop :: State -> State
 656     loop st =
 657       [ look (key, val) st
 658       | (key, val) <- st & statePats & Map.toList
 659       ]
 660         & catMaybes
 661         & headMaybe
 662         & fromMaybe (loop st{statePats = initPats})
 663     look :: (PatKey, PatNode) -> State -> Maybe State
 664     look kv@(key, val) st =
 665       case key of
 666         PatKeyPrev patPrev -> errorShow ("prev" :: Text)
 667         PatKeyNext patNext ->
 668           case patNext of
 669             PatContextLexicalBorder
 670               | stateInput st & LZ.endp -> match kv st
 671             -- PatContextChar c
 672             --  | Just inpNext <- stateInput st & LZ.safeCursor ->
 673             --    inpNext & inpPats &
 674             --  case  of
 675             --    Nothing ->
 676             _ -> Nothing
 677     match kv@(key, val) st =
 678       case val of
 679         PatEnd pron ->
 680           Just
 681             st
 682               { statePats = initPats
 683               , stateBuffer = []
 684               , stateInput =
 685                   stateInput st
 686                     & LZ.insert
 687                       Inp
 688                         { inpPats = stateBuffer st & (key :) & List.reverse
 689                         , inpPronunciations = pron
 690                         }
 691               }
 692         PatNode pats ->
 693           Just
 694             st
 695               { statePats = pats
 696               , stateBuffer = key : stateBuffer st
 697               }
 698
 699 {-
 700 case statePats st & Map.lookup k of
 701   Nothing -> st
 702   Just (PatNode pats) ->
 703     loop st {statePats = pats, stateBuffer = k : stateBuffer st}
 704   Just (PatEnd end) ->
 705     loop st { statePats = initPats
 706        , stateBuffer = []
 707        , stateInput = stateInput st
 708            & LZ.insert Inp
 709               { inpPats = stateBuffer st & List.reverse
 710               , inpPronunciations = end
 711               }
 712            & LZ.right
 713        }
 714 -}
 715
 716 {-
 717 parse :: PatNode -> Text -> [Inp]
 718 parse initPats input =
 719   let inpZip = input & Text.unpack & fmap charToInp & LZ.fromList in
 720   runInp [] initPats inpZip & LZ.toList
 721   where
 722   charToInp :: Char -> Inp
 723   charToInp c = Inp
 724     { inpPats = [PatKeyNext (PatContextChar c)]
 725     , inpPronunciations = []
 726     }
 727   runInp :: [PatKey] -> PatNode -> LZ.Zipper Inp -> LZ.Zipper Inp
 728   runInp oks pat inp =
 729     traceShow ("runInp"::Text, ("oks"::Text) := oks, ("cur"::Text) := LZ.safeCursor inp) $
 730     case pat of
 731       PatEnd end ->
 732         -- the pattern ends
 733         inp
 734         & LZ.insert Inp
 735           { inpPats = oks & List.reverse
 736           , inpPronunciations = end
 737           }
 738         & LZ.right
 739         & runInp [] initPats
 740       PatNode pats ->
 741         -- the pattern may go on
 742         case inp & LZ.safeCursor of
 743           Nothing ->
 744             inp
 745               & runPat [] oks [PatKeyNext PatContextLexicalBorder] pats
 746           Just cur ->
 747             inp & LZ.delete
 748                 & runPat [] oks (inpPats cur & List.sort) pats
 749
 750   runPat :: [PatKey] -> [PatKey] -> [PatKey] -> Map PatKey PatNode -> LZ.Zipper Inp -> LZ.Zipper Inp
 751   runPat kos oks todos pats inp =
 752     traceShow ( "runPat"::Text
 753               , ("kos"::Text) := kos
 754               , ("oks"::Text) := oks
 755               , ("todos"::Text) := todos
 756               , ("cur"::Text) :=LZ.safeCursor inp
 757               ) $
 758     case todos of
 759       [] | LZ.endp inp -> inp
 760         & runInp kos (PatEnd [])
 761         & runInp oks (PatEnd [])
 762       [] ->
 763         -- nothing left to advance the pattern
 764         --traceShow ("runPat/[]"::Text) $
 765           inp
 766             & (if null kos then id else runInp kos (PatEnd []))
 767             & runInp oks (PatNode pats)
 768       k:ks ->
 769         case pats & Map.lookup k of
 770           -- the pattern ends
 771           Just (PatEnd end) ->
 772             --traceShow ("runPat/End"::Text) $
 773             -- ks forgotten
 774             inp
 775               & (if null kos then id else runInp kos (PatEnd []))
 776               & runInp (k:oks) (PatEnd end)
 777           -- the pattern advances
 778           Just (PatNode nextPats) ->
 779             --traceShow ("runPat/Node"::Text) $
 780             inp & runPat kos (k:oks) ks nextPats
 781           -- the pattern does not advance
 782           Nothing ->
 783             inp & runPat (k:kos) oks ks pats
 784 -}
 785
 786 data PatContext
 787   = PatContextChar Char
 788   | PatContextLexicalCategory Char.GeneralCategory
 789   | PatContextLexicalBorder
 790   | PatContextPhoneticVowel
 791   | PatContextPhoneticSemiVowel
 792   | PatContextPhoneticConsonant
 793   deriving (Eq, Ord, Show)
 794
 795 data Inp = Inp
 796   { inpPats :: [PatKey]
 797   , inpPronunciations :: Pronunciations
 798   }
 799   deriving (Show)
 800
 801 data PatKey
 802   = PatKeyPrev PatContext
 803   | PatKeyNext PatContext
 804   deriving (Eq, Ord, Show)
 805 data PatNode
 806   = PatNode (Map PatKey PatNode)
 807   | PatEnd Pronunciations
 808   deriving (Show)
 809 instance IsList PatNode where
 810   type Item PatNode = (PatKey, PatNode)
 811   fromList = PatNode . Map.fromListWith (errorShow)
 812   toList = errorShow
 813
 814 data Lexeme
 815   = LexemeBorder
 816   | LexemeVowel
 817   | LexemeSemiVowel
 818   | LexemeConsonant
 819   | LexemeDoubleConsonant
 820   | LexemeSilent
 821   | LexemeMeaning ShortText
 822   | -- | `LexemeChar` is last to have priority when using `Map.toDescList`
 823     LexemeChar Char
 824   deriving (Eq, Ord, Show)
 825
 826 -- data Sound
 827 --   = SoundVowel
 828 --   | SoundConsonant
 829 --   deriving (Eq, Ord, Show)
 830
 831 {-
 832 newtype Lexemes = Lexemes { unLexemes :: [Lexeme] }
 833   deriving (Eq, Ord, Show)
 834 instance P.Stream Lexemes where
 835   type Token Lexemes = Lexeme
 836   type Tokens Lexemes = Lexemes
 837   tokensToChunk _px = Lexemes
 838   chunkToTokens _px = unLexemes
 839   chunkLength _px = unLexemes >>> List.length
 840   chunkEmpty _px = unLexemes >>> List.null
 841   take1_ = unLexemes >>> P.take1_ >>> coerce
 842   takeN_ n = unLexemes >>> P.takeN_ n >>> coerce
 843   takeWhile_ p = unLexemes >>> P.takeWhile_ p >>> coerce
 844
 845 instance IsString Lexemes where
 846   fromString s =
 847     s
 848       & Text.pack
 849       & runLexer
 850       & either
 851           errorShow
 852           ((`appEndo` []) >>> Lexemes)
 853 -}
 854
 855 newtype RuleLexemes = RuleLexemes {unRuleLexemes :: [Lexeme]}
 856   deriving (Eq, Ord, Show)
 857 instance HasTypeDefault RuleLexemes where
 858   typeDefault = RuleLexemes typeDefault
 859 instance Semigroup RuleLexemes where
 860   RuleLexemes x <> RuleLexemes y = RuleLexemes (x <> y)
 861 instance Monoid RuleLexemes where
 862   mempty = RuleLexemes mempty
 863 instance IsList RuleLexemes where
 864   type Item RuleLexemes = Lexeme
 865   fromList = RuleLexemes
 866   toList = unRuleLexemes
 867 instance IsString RuleLexemes where
 868   fromString s =
 869     s
 870       & Text.pack
 871       & runLexer
 872       & either
 873         errorShow
 874         ( List.dropWhileEnd (== LexemeBorder)
 875             >>> List.dropWhile (== LexemeBorder)
 876             >>> RuleLexemes
 877         )
 878
 879 newtype InputLexemes = InputLexemes {unInputLexemes :: [Lexeme]}
 880   deriving (Eq, Ord, Show)
 881 instance HasTypeDefault InputLexemes where
 882   typeDefault = InputLexemes typeDefault
 883 instance Semigroup InputLexemes where
 884   InputLexemes x <> InputLexemes y = InputLexemes (x <> y)
 885 instance Monoid InputLexemes where
 886   mempty = InputLexemes mempty
 887 instance IsList InputLexemes where
 888   type Item InputLexemes = Lexeme
 889   fromList = InputLexemes
 890   toList = unInputLexemes
 891 instance IsString InputLexemes where
 892   fromString s =
 893     s
 894       & Text.pack
 895       & runLexer
 896       & either errorShow InputLexemes
 897
 898 instance P.ShowErrorComponent () where
 899   showErrorComponent = show
 900   errorComponentLen _ = 2
 901 instance P.VisualStream [Lexeme] where
 902   showTokens _s = show
 903   tokensLength _s xs = xs <&> (show >>> List.length) & sum
 904 instance P.TraversableStream [Lexeme] where
 905   reachOffset off pos = (Nothing, pos{P.pstateOffset = P.pstateOffset pos + off})
 906
 907 data LexemeTag
 908   = LexemeTagLetter
 909   | LexemeTagSpace
 910   | LexemeTagPunctuation
 911   | LexemeTagSeparator
 912   | LexemeTagMark
 913   | LexemeTagSymbol
 914   | LexemeTagDefinition
 915   | LexemeTagBorder
 916   deriving (Eq, Ord, Show)
 917 deriving instance Ord (IPA.Syllable [])
 918 deriving instance Ord IPA.SuprasegmentalFeature
 919 deriving instance Ord IPA.SegmentalFeature
 920 deriving instance Ord IPA.Sibilance
 921 deriving instance Ord IPA.Manner
 922 deriving instance Ord IPA.Phonation
 923 deriving instance Ord IPA.Roundedness
 924 deriving instance Ord IPA.Height
 925 deriving instance Ord IPA.Vowel
 926 deriving instance Ord IPA.Consonant
 927 deriving instance Ord IPA.Segment
 928
 929 {-
 930 tableToMatch :: Table -> [Lexeme] -> [Pronunciations]
 931 tableToMatch tbl = loop
 932   where
 933   loop prevBorder = \case
 934     InputText inp ->
 935       [ (matchingLength, )
 936       | (trans, transMach) <- chunk & chunkMachine & machineAlts & Map.toList
 937       , let matchingLength = transMatchingLength input trans
 938       , 0 < matchingLength || not (isTransConsume trans)
 939       , let (inputRead, inputRest) = input & Text.splitAt matchingLength
 940       ]
 941         & Map.fromListWith (\new old -> old)
 942         & Map.lookupMax
 943         <&> snd
 944
 945 -}
 946 tableHtml :: Table -> IO HTML.Html
 947 tableHtml tbl = do
 948   dataPath <- Self.getDataDir <&> File.normalise
 949   let title :: String = "LexerDict"
 950   let pageOrientation = Paper.PageOrientationPortrait
 951   let pageSize = Paper.PageSizeA4
 952   let partLangue = LangueFrançais
 953   return do
 954     HTML.docTypeHtml do
 955       HTML.head do
 956         HTML.title $ title & HTML.toHtml
 957         forM_
 958           ( [ "styles/Paper.css"
 959             , "styles/French/Lexer.css"
 960             , "styles/Rosetta/Reading.css"
 961             ]
 962               & list
 963           )
 964           \cssFile ->
 965             HTML.link
 966               ! HA.rel "stylesheet"
 967               ! HA.type_ "text/css"
 968               ! HA.href (dataPath </> cssFile & HTML.toValue)
 969         HTML.styleCSS $ HTML.cssPrintPage pageOrientation pageSize
 970       -- HTML.styleCSS $ pageDifficulties & difficultyCSS
 971       HTML.body
 972         ! classes ["A4", "french-lexer"]
 973         $ do
 974           "\n"
 975           let rulesChunks = tbl & Map.toList & chunksOf 50
 976           forM_ rulesChunks \rules ->
 977             HTML.section
 978               ! classes
 979                 [ "sheet"
 980                 ]
 981               ! styles
 982                 []
 983               $ do
 984                 forM_ (rules & List.zip [1 :: Int ..]) \(ruleIndex, (rulePat, Rule{..})) -> do
 985                   "\n"
 986                   HTML.div
 987                     ! classes
 988                       [ "dict-entry"
 989                       , if even ruleIndex then "even" else "odd"
 990                       ]
 991                     ! styles
 992                       []
 993                     $ do
 994                       "\n"
 995                       HTML.div
 996                         ! classes
 997                           [ "dict-key"
 998                           , "lang-" <> className partLangue
 999                           ]
1000                         ! styles
1001                           []
1002                         -- "grid-template-columns" :=
1003                         --  (0.5 & cm & HTML.toCSS)
1004                         --    & List.replicate lexerDictMaxKeyLength
1005                         --    & List.unwords
1006
1007                         $ do
1008                           forM_ (["model"] :: [String]) \rowKind -> do
1009                             forM_ (rulePat & unRuleLexemes) \ruleChar -> do
1010                               -- let uniScript = cellToken & tokenMeta & snd & fromMaybe (UnicodeBlockLatin UnicodeBlockLatin_Basic)
1011                               case ruleChar of
1012                                 LexemeChar c -> do
1013                                   HTML.span
1014                                     ! classes
1015                                       [ "dict-key-cell"
1016                                       , rowKind
1017                                       -- , "script-" <> className uniScript
1018                                       ]
1019                                     $ do
1020                                       c & HTML.toHtml
1021                                 _ -> ""
1022                       HTML.div
1023                         ! classes
1024                           [ "dict-pronunciation"
1025                           ]
1026                         $ do
1027                           -- HTML.span ! classes ["arrow"] $ "→"
1028                           case rulePron of
1029                             Pronunciations
1030                               { unPronunciations =
1031                                 all (snd >>> pronunciationIPABroad >>> all IPA.isSilent) -> True
1032                               } -> ""
1033                             Pronunciations{unPronunciations = is} ->
1034                               is
1035                                 & foldMap (snd >>> pronunciationIPABroad >>> foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic))
1036                                 & HTML.toHtml
1037                       HTML.div
1038                         ! classes
1039                           [ "dict-lexeme"
1040                           ]
1041                         $ do
1042                           -- HTML.span ! classes ["arrow"] $ "→"
1043                           forM_ (ruleExamples & Map.toList) \(_inp, Pronunciation{..}) -> do
1044                             HTML.span
1045                               ! classes
1046                                 []
1047                               $ do
1048                                 case pronunciationIPABroad of
1049                                   [] -> pronunciationText & HTML.toHtml
1050                                   _ -> pronunciationIPABroad & foldMap (IPA.toIPA_ >>> IPA.transcribe IPA.Phonemic) & HTML.toHtml
1051                             "; "