1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE ImportQualifiedPost #-}
4 module Language.PronunciationSpec where
6 import Data.ByteString.Builder qualified as ByteString.Builder
7 import Data.GenValidity.Map ()
8 import Data.GenValidity.Sequence ()
9 import Data.GenValidity.Set ()
10 import Data.GenValidity.Text ()
11 import Data.List qualified as List
12 import Data.Map.Strict qualified as Map
13 import Data.Set qualified as Set
14 import Data.Text qualified as Text
15 import Data.Text.Encoding qualified as Text
16 import Data.Text.Lazy.Encoding qualified as Text.Lazy
17 import Data.Text.Short qualified as ShortText
18 import Data.Validity.Map ()
19 import Data.Validity.Set ()
20 import Data.Validity.Text ()
22 import Language.French
23 import Language.French qualified as French
24 import Language.English
25 import Language.English qualified as English
26 import Language.Pronunciation qualified as Pron
27 import Paths_worksheets qualified as Self
28 import System.Directory qualified as IO
29 import System.FilePath (joinPath, pathSeparator, (<.>), (</>))
30 import System.FilePath.Posix qualified as File
31 import System.IO qualified as IO
33 import Text.Blaze.Html5.Attributes qualified as HA
34 import Wiktionary qualified
35 import Worksheets.Utils.HTML (Html, className, classes, cm, styles, (!))
36 import Worksheets.Utils.HTML qualified as HTML
37 import Worksheets.Utils.IPA qualified as IPA
38 import Worksheets.Utils.Paper qualified as Paper
39 import Worksheets.Utils.Prelude
40 import Worksheets.Utils.SQL qualified as SQL
41 import Prelude (error, even, mod)
44 cardsHtml :: String -> IO Html -> TestDefM (outers) () ()
45 cardsHtml title html = do
46 outPath <- goldenPath title "html"
47 builder <- html <&> HTML.renderMarkupBuilder & liftIO
49 goldenByteStringBuilderFile outPath (return builder)
51 pronunciationDictTest dict = do
53 outPath <- goldenPath title "html"
54 builder <- dict & Pron.tableHtml <&> HTML.renderMarkupBuilder & liftIO
56 goldenByteStringBuilderFile outPath (return builder)
58 pronunciationDecompositionTest :: String -> TestDefM (outers) () ()
59 pronunciationDecompositionTest sentence = do
60 outPath <- goldenPath sentence "txt"
62 pureGoldenByteStringBuilderFile outPath
63 $ case sentence & Text.pack & Pron.lexerPron French.pronunciationDict of
65 x & foldMap \(key, lex) ->
71 & ByteString.Builder.byteString
72 err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString
74 spec :: HasCallStack => Spec
77 pronunciationDictTest $ English.pronunciationDict
79 pronunciationDictTest $ French.pronunciationDict
80 describe "textToLexeme" do
81 forM_ sentences \sentence ->
82 pronunciationDecompositionTest $ sentence & Text.unpack
85 around (\k -> SQL.withConnection "data/langs/français/kaikki/wiktionary=fr.lang=fr.sqlite3" k) do
86 outDB <- goldenPath "wiktionary" "sqlite"
87 outDBExists <- IO.doesFileExist outDB & liftIO
89 IO.removeFile outDB & liftIO
92 SQL.withConnection outDB \outConn ->
96 it "check-against-wiktionary" \(wiktConn, outConn) -> do
97 SQL.execute_ outConn $ "PRAGMA journal_mode = OFF"
98 SQL.execute_ outConn $ "PRAGMA synchronous = OFF"
99 SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS results (exprLit TEXT NON NULL, wiktBroad TEXT NON NULL, wiktBroadSegments TEXT NON NULL, synthBroad TEXT NON NULL, synthExplain TEXT NON NULL)"
100 SQL.execute_ outConn $ "CREATE INDEX results__exprLit ON results (exprLit);"
101 SQL.execute_ outConn $ "CREATE INDEX results__wiktBroad ON results (wiktBroad);"
102 SQL.execute_ outConn $ "CREATE INDEX results__synthBroad ON results (synthBroad);"
103 SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS errors (exprId INTEGER NON NULL, exprLit TEXT NON NULL, error TEXT NON NULL)"
106 "SELECT id,word,sounds FROM wiktionary WHERE lang_code='fr' LIMIT 1000"
111 , maybeWord :: Maybe Text
112 , sounds :: [Wiktionary.Sound]
117 case exprLit & Pron.lexerPron French.pronunciationDict of
121 "INSERT INTO errors (exprId, exprLit, error) VALUES (?,?,?)"
127 let synthExplain = lexemes <&> (\(key, lex) -> key <> " → " <> lex) & Text.intercalate "; "
128 let synthBroad = lexemes & foldMap snd
129 forM_ sounds \Wiktionary.Sound{..} -> do
130 -- IO.hPrint IO.stderr (i, ident::Int, maybeWord)
132 Just (IPA.IPAPhons exprIPAs) -> do
134 IPA.IPAPhonemic exprBroad -> do
135 when (i `mod` 5000 == 0) do
136 IO.hPrint IO.stderr (i, exprLit, exprBroad)
139 "INSERT INTO results(exprLit, wiktBroad, wiktBroadSegments, synthBroad, synthExplain) VALUES(?,?,?,?,?);"
141 , exprBroad & IPA.ipaWordsToText Nothing
142 , exprBroad & IPA.dropSupraSegmentalFeatures & IPA.ipaWordsToText Nothing
146 IPA.IPAPhonError (_errMsg, _err) -> return ()
153 mangleSupraSegmentalFeatures :: [IPA.Syllable []] -> [[IPA.Segment]]
154 mangleSupraSegmentalFeatures = \case
156 IPA.Syllable syl : ts -> syl : mangleSupraSegmentalFeatures ts
157 IPA.WithSuprasegmentalFeature IPA.Linking syl : ts ->
158 case mangleSupraSegmentalFeatures ts of
159 [] -> [dropSupraSegmentalFeatures syl]
160 x : xs -> (dropSupraSegmentalFeatures syl <> x) : xs
161 IPA.WithSuprasegmentalFeature _feat syl : xs ->
162 mangleSupraSegmentalFeatures (syl : xs)
165 -- cardsHtml "syllabes" $ French.syllablesTable & French.syllablesTableToHTML
177 $ French.pronunciationDict
183 & foldMap (Pron.literalText >>> ShortText.toText)