tests/Language/FrenchSpec.hs

   1 {-# LANGUAGE OverloadedLists #-}
   2 {-# LANGUAGE OverloadedStrings #-}
   3
   4 module Language.FrenchSpec where
   5
   6 import Data.ByteString.Builder qualified as ByteString.Builder
   7 import Data.GenValidity.Map ()
   8 import Data.GenValidity.Sequence ()
   9 import Data.GenValidity.Set ()
  10 import Data.GenValidity.Text ()
  11 import Data.List qualified as List
  12 import Data.Map.Strict qualified as Map
  13 import Data.Set qualified as Set
  14 import Data.Text qualified as Text
  15 import Data.Text.Encoding qualified as Text
  16 import Data.Text.Lazy.Encoding qualified as Text.Lazy
  17 import Data.Text.Short qualified as ShortText
  18 import Data.Validity.Map ()
  19 import Data.Validity.Set ()
  20 import Data.Validity.Text ()
  21 import Language
  22 import Language.French qualified as French
  23 import Language.Pronunciation (ExampleLiteral (..), Lexeme (..), Rule (..), RuleLexemes, after, before, begining, ending, meaning, occurence, rule, silent, word)
  24 import Language.Pronunciation qualified as Pron
  25 import Paths_worksheets qualified as Self
  26 import System.Directory qualified as IO
  27 import System.FilePath (joinPath, pathSeparator, (<.>), (</>))
  28 import System.FilePath.Posix qualified as File
  29 import System.IO qualified as IO
  30 import Test.Syd
  31 import Text.Blaze.Html5.Attributes qualified as HA
  32 import Utils.Pronunciation qualified as Pron
  33 import Utils.Tests
  34 import Wiktionary qualified
  35 import Worksheets.Utils.HTML (Html, className, classes, cm, styles, (!))
  36 import Worksheets.Utils.HTML qualified as HTML
  37 import Worksheets.Utils.IPA qualified as IPA
  38 import Worksheets.Utils.Paper qualified as Paper
  39 import Worksheets.Utils.Prelude
  40 import Worksheets.Utils.SQL qualified as SQL
  41 import Prelude (error, even, mod)
  42
  43 {-
  44 instance IsString Rule where
  45   fromString s = rule {ruleLexemes = fromString s}
  46   [ ful "de" := single "də" ["de" & occurence]
  47   , inf "'" := single "" []
  48   , inf "-" := single "" []
  49   , inf "," := single "" []
  50   , inf "?" := single "" []
  51   , inf "!" := single "" []
  52   , inf borderLeftText := single "" []
  53   , inf borderRightText := single "" []
  54
  55   , inf "demi" := single "dəmi" ["demi" & occurence]
  56   , -- , inf "amb" := single "ɑ̃b" ["amb" & occurence, "ulance"]
  57     -- , inf "amp" := single "ɑ̃p" ["amp" & occurence, "oule"]
  58     -- , inf "amph" := single "ɑ̃f" ["amph" & occurence, "ithéâtre"]
  59     -- , inf "an" := multi
  60     --    [ [PronContextBeforeBorder] := LexemePron{lexemePronunciation="ɑ̃", lexemeExample=["pl", "an" & occurence]}
  61     --    , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation="an", lexemeExample=["an" & occurence, "imal"]}
  62     --    , [PronContextBeforeAny] := LexemePron{lexemePronunciation="ɑ̃", lexemeExample=["an" & occurence, "tilope"]}
  63     --    ]
  64   , -- , inf "coin" := single "kwɛ̃" ["coin" & occurence, "cer"]
  65
  66   , -- , inf "im"
  67     --    := multi
  68     --      [ [PronContextBeforeBorder] := LexemePron{lexemePronunciation = "", lexemeExample = }
  69     --      , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "im", lexemeExample = ["im" & occurence, "ortel"]}
  70     --      , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "ɛ̃", lexemeExample = ["im" & occurence, "primer"]}
  71     --      ]
  72   , -- , inf "in" := single "ɛ̃" ["merl", "in" & occurence]
  73     -- , inf "ina" := single "ina" ["ord", "ina" & occurence, "teur"]
  74     -- , inf "ine" := single "in" ["rout", "ine" & occurence]
  75     -- , inf "inent" := single "in" ["dess", "inent" & occurence]
  76     -- , inf "iner" := single "inɛʁ" ["iner" & occurence, "tie"]
  77     -- , inf "inex" := single "inɛgz" ["inex" & occurence, "istant"]
  78     -- , inf "inexp" := single "inɛksp" ["inexp" & occurence, "licable"]
  79     -- , inf "inima" := single "inima" ["inima" & occurence, "ginable"]
  80     -- , inf "inimi" := single "inimi" ["inimi" & occurence, "table"]
  81     -- , inf "inimp" := single "inimp" ["inimp" & occurence, "ortant"]
  82     -- , inf "ininf" := single "inɛ̃" ["ininf" & occurence, "lammation"]
  83     -- , inf "inint" := single "inɛ̃t" ["inint" & occurence, "éressant"]
  84     -- , inf "inn" := single "in" ["inn" & occurence, "ocent"]
  85     -- , inf "ino" := single "ino" ["ino" & occurence, "dore"]
  86     -- , inf "inu" := single "inu" ["inu" & occurence, "tile"]
  87     -- , inf "ix" := single "iks" ["phén", "ix" & occurence]
  88     -- , inf "gine" := single "ʒin" ["an", "gine" & occurence]
  89   -- , inf "era" := single "əʁa" ["s", "era" & occurence]
  90   -- , inf "erai" := single "əʁɛ" ["s", "erai" & occurence]
  91   , inf "era" :=
  92       multi
  93         [ [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "əʁ", lexemeExample = ["mer" & occurence, "ingue"]}
  94         , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "ɛʁ", lexemeExample = ["mer" & occurence, "le"]}
  95         ]
  96   , inf "erin" := single "əʁɛ̃" ["m", "erin" & occurence, "gue"]
  97   , inf "s" :=
  98       multi
  99         [ [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "z", lexemeExample = ["ti", "s" & occurence, "ane"]}
 100         , [PronContextBeforeAnySemiVowel] := LexemePron{lexemePronunciation = "z", lexemeExample = ["pari", "s" & occurence, "ienne"]}
 101         , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "s", lexemeExample = ["s", "a" & occurence]}
 102         ]
 103   , -- , inf "shirt" := single "ʃœʁt" ["shirt" & occurence]
 104   , -- , inf "teau" := single "to" ["teau" & occurence]
 105     -- , inf "ti" := single "ti" ["ti" & occurence, "gre"]
 106   , pre "ukulele" := single "jukulele" ["ukulele" & occurence]
 107   , -- , inf "ui" := single "ɥi" ["ling", "ui" & occurence, "stique"]
 108   , pre "abrivent" := single "abʁivɑ̃" ["abrivent" & occurence]
 109   , pre "adjuvent" := single "adʒyvɑ̃" ["adjuvent" & occurence]
 110   , pre "antivent" := single "ɑ̃tivɑ̃" ["antivent" & occurence]
 111   , pre "auvent" := single "ovɑ̃" ["auvent" & occurence]
 112   , pre "avent" := single "avɑ̃" ["avent" & occurence]
 113   , pre "aï" := single "ai" ["c", "aï" & occurence, "man"]
 114   , pre "bavent" := single "bavɑ̃" ["bavent" & occurence]
 115   , pre "boutavent" := single "butavɑ̃" ["boutavent" & occurence]
 116   , pre "bouvent" := single "buvɑ̃" ["bouvent" & occurence]
 117   , pre "bénévent" := single "benevɑ̃" ["bénévent" & occurence]
 118   , pre "connivent" := single "konivɑ̃" ["connivenr" & occurence]
 119   , pre "contrevent" := single "kɔ̃tʁəvɑ̃" ["contrevent" & occurence]
 120   , pre "convent" := single "kɔ̃vɑ̃" ["convent" & occurence]
 121   , pre "couvent" := single "kuvɑ̃" ["couvent" & occurence]
 122   , pre "couvents" := single "kuvɑ̃" ["couvents" & occurence]
 123   , pre "engoulevent" := single "ɑ̃gulvɑ̃" ["engoulevent" & occurence]
 124   , pre "fervent" := single "fɛʁvɑ̃" ["fervent" & occurence]
 125   , pre "frévent" := single "fʁevɑ̃" ["frévent" & occurence]
 126   , pre "heurtevent" := single "œʁtəvɑ̃" ["heurtevent" & occurence]
 127   , pre "her" := single "ɛʁ" ["her" & occurence, "be"]
 128   , pre "hi" := single "i" ["hi" & occurence, "ver"]
 129   , pre "ill" := single "il" ["ill" & occurence, "étrisme"]
 130   , pre "jack" := single "dʒak" ["jack" & occurence]
 131   , pre "jazz" := single "dʒaz" ["jazz" & occurence]
 132   , pre "montsurvent" := single "mɔ̃syʁvɑ̃" ["montsurvent" & occurence]
 133   , -- , pre "ni" := single "ni" ["ni" & occurence, "ais"]
 134     pre "niai" := single "njɛ" ["niai" & occurence, "s"]
 135   , pre "paravent" := single "paʁavɑ̃" ["paravent" & occurence]
 136   , pre "sen" :=
 137       multi
 138         [ [PronContextBeforeAny] := LexemePron{lexemePronunciation = "sɑ̃", lexemeExample = ["sen" & occurence, "t"]}
 139         , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "sen", lexemeExample = ["sen" & occurence, "a"]}
 140         -- , inf "a" := LexemePron{lexemePronunciation="sen", lexemeExample=["sen" & occurence, "a"]}
 141         -- , inf "e" := LexemePron{lexemePronunciation="sən", lexemeExample=["sen" & occurence, "estre"]}
 142         -- , inf "i" := LexemePron{lexemePronunciation="sen", lexemeExample=["sen" & occurence, "ior"]}
 143         -- , inf "o" := LexemePron{lexemePronunciation="sen", lexemeExample=["Sen" & occurence, "oueix"]}
 144         -- , inf "u" := LexemePron{lexemePronunciation="sen", lexemeExample=[]}
 145         ]
 146   , pre "souvent" := single "suvɑ̃" ["souvent" & occurence]
 147   , pre "taillevent" := single "tɑjvɑ̃" ["taillevent" & occurence]
 148   , pre "tournevent" := single "tuʁnəvɑ̃" ["tournevent" & occurence]
 149   , pre "vent" := single "vɑ̃" ["vent" & occurence]
 150   , pre "virevent" := single "viʁvɑ̃" ["virevent" & occurence]
 151   , pre "volvent" := single "vɔlvɑ̃" ["volvent" & occurence]
 152   , pre "évent" := single "evɑ̃" ["évent" & occurence]
 153   , pre "œ" := single "œ" ["œ" & occurence, "uf"]
 154   , pre "fier" := single "fjɛʁ" ["fier" & occurence]
 155   , pre "tier" := single "tjɛʁ" ["tier" & occurence]
 156
 157   ]
 158     & list
 159     & foldMap (\(ks, v) -> [(k, v) | k <- ks])
 160     & mapFromListCheckingDuplicates
 161   where
 162     single pron exs =
 163       Map.singleton PronContextBeforeAny $ LexemePron{lexemePronunciation = pron, lexemeExample = exs}
 164     multi l =
 165       l
 166         & list
 167         & foldMap (\(ks, v) -> [(k, v) | k <- ks])
 168         & mapFromListCheckingDuplicates
 169 -}
 170
 171 spec :: HasCallStack => Spec
 172 spec = do
 173   -- Pron.lexerRender $ French.pronunciationDict
 174   describe "pronunciation" do
 175     -- describe "decomposition" do
 176     --  forM_ sentences \sentence ->
 177     --    Pron.pronunciationDecompositionTest French.pronunciationDict $ sentence & Text.unpack
 178     -- describe "result" do
 179     --  forM_ sentences \sentence ->
 180     --    Pron.pronunciationResultTest French.pronunciationDict $ sentence & Text.unpack
 181     describe "parser" do
 182       forM_ sentences \sentence ->
 183         Pron.pronunciationParserTest French.pronunciationRules sentence
 184
 185 {-
 186 withoutRetries do
 187   withoutTimeout do
 188     around (\k -> SQL.withConnection "data/langs/français/kaikki/wiktionary=fr.lang=fr.sqlite3" k) do
 189       outDB <- goldenPath "wiktionary" "sqlite"
 190       outDBExists <- IO.doesFileExist outDB & liftIO
 191       when (outDBExists) do
 192         IO.removeFile outDB & liftIO
 193       aroundWith
 194         ( \k wiktConn -> do
 195             SQL.withConnection outDB \outConn ->
 196               k (wiktConn, outConn)
 197         )
 198         do
 199           it "check-against-wiktionary" \(wiktConn, outConn) -> do
 200             SQL.execute_ outConn $ "PRAGMA journal_mode = OFF"
 201             SQL.execute_ outConn $ "PRAGMA synchronous = OFF"
 202             SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS results (exprLit TEXT NON NULL, wiktBroad TEXT NON NULL, wiktBroadSegments TEXT NON NULL, synthBroad TEXT NON NULL, synthExplain TEXT NON NULL)"
 203             SQL.execute_ outConn $ "CREATE INDEX results__exprLit ON results (exprLit);"
 204             SQL.execute_ outConn $ "CREATE INDEX results__wiktBroad ON results (wiktBroad);"
 205             SQL.execute_ outConn $ "CREATE INDEX results__synthBroad ON results (synthBroad);"
 206             SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS errors (exprId INTEGER NON NULL, exprLit TEXT NON NULL, error TEXT NON NULL)"
 207             SQL.fold
 208               wiktConn
 209               "SELECT id,word,sounds FROM wiktionary WHERE lang_code='fr' LIMIT 1000"
 210               ()
 211               (1 :: Int)
 212               \ !i
 213                 ( exprId :: Int
 214                   , maybeWord :: Maybe Text
 215                   , sounds :: [Wiktionary.Sound]
 216                   ) -> do
 217                   case maybeWord of
 218                     Nothing -> return ()
 219                     Just exprLit ->
 220                       case exprLit & Pron.run French.pronunciationRules of
 221                         Left err -> do
 222                           SQL.execute
 223                             outConn
 224                             "INSERT INTO errors (exprId, exprLit, error) VALUES (?,?,?)"
 225                             ( exprId
 226                             , exprLit
 227                             , err & pShowNoColor
 228                             )
 229                         Right lexemes -> do
 230                           let synthExplain = lexemes <&> (\Pron.Pron{pronInput, pronRule} -> Pron.chars pronInput <> " → " <> show (Pron.rulePron pronRule)) & List.intercalate "; "
 231                           let synthBroad = lexemes & foldMap (Pron.pronRule >>> Pron.rulePron >>> Pron.pronunciationText)
 232                           forM_ sounds \Wiktionary.Sound{..} -> do
 233                             -- IO.hPrint IO.stderr (i, ident::Int, maybeWord)
 234                             case sound_ipa of
 235                               Just (IPA.IPAPhons exprIPAs) -> do
 236                                 forM_ exprIPAs \case
 237                                   IPA.IPAPhonemic exprBroad -> do
 238                                     when (i `mod` 5000 == 0) do
 239                                       IO.hPrint IO.stderr (i, exprLit, exprBroad)
 240                                     SQL.execute
 241                                       outConn
 242                                       "INSERT INTO results(exprLit, wiktBroad, wiktBroadSegments, synthBroad, synthExplain) VALUES(?,?,?,?,?);"
 243                                       ( exprLit
 244                                       , exprBroad & IPA.ipaWordsToText Nothing
 245                                       , exprBroad & IPA.dropSupraSegmentalFeatures & IPA.ipaWordsToText Nothing
 246                                       , synthBroad
 247                                       , synthExplain
 248                                       ) -- & traceShowId
 249                                   IPA.IPAPhonError (_errMsg, _err) -> return ()
 250                                   _ -> return ()
 251                               _ -> return ()
 252                   return (i + 1)
 253             return @IO ()
 254 -}
 255
 256 {-
 257 mangleSupraSegmentalFeatures :: [IPA.Syllable []] -> [[IPA.Segment]]
 258 mangleSupraSegmentalFeatures = \case
 259   [] -> []
 260   IPA.Syllable syl : ts -> syl : mangleSupraSegmentalFeatures ts
 261   IPA.WithSuprasegmentalFeature IPA.Linking syl : ts ->
 262     case mangleSupraSegmentalFeatures ts of
 263       [] -> [dropSupraSegmentalFeatures syl]
 264       x : xs -> (dropSupraSegmentalFeatures syl <> x) : xs
 265   IPA.WithSuprasegmentalFeature _feat syl : xs ->
 266     mangleSupraSegmentalFeatures (syl : xs)
 267 -}
 268
 269 -- cardsHtml "syllabes" $ French.syllablesTable & French.syllablesTableToHTML
 270
 271 sentences :: Set [ExampleLiteral]
 272 sentences =
 273   [
 274     [ ["aiguille"]
 275     , ["ail"]
 276     , ["aimer"]
 277     , ["ampoules"]
 278     , ["couvent"{exampleLiteralMeaning = "couver"}]
 279     , ["couvent"{exampleLiteralMeaning = "monastère"}]
 280     , ["papillon"]
 281     ]
 282   , French.pronunciationRules
 283       & Pron.examples
 284   ]
 285     & mconcat