]> Git — Sourcephile - julm/worksheets.git/blob - tests/Language/PronunciationSpec.hs
update
[julm/worksheets.git] / tests / Language / PronunciationSpec.hs
1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE ImportQualifiedPost #-}
3
4 module Language.PronunciationSpec where
5
6 import Data.ByteString.Builder qualified as ByteString.Builder
7 import Data.GenValidity.Map ()
8 import Data.GenValidity.Sequence ()
9 import Data.GenValidity.Set ()
10 import Data.GenValidity.Text ()
11 import Data.List qualified as List
12 import Data.Map.Strict qualified as Map
13 import Data.Set qualified as Set
14 import Data.Text qualified as Text
15 import Data.Text.Encoding qualified as Text
16 import Data.Text.Lazy.Encoding qualified as Text.Lazy
17 import Data.Text.Short qualified as ShortText
18 import Data.Validity.Map ()
19 import Data.Validity.Set ()
20 import Data.Validity.Text ()
21 import Language
22 import Language.French
23 import Language.French qualified as French
24 import Language.English
25 import Language.English qualified as English
26 import Language.Pronunciation qualified as Pron
27 import Paths_worksheets qualified as Self
28 import System.Directory qualified as IO
29 import System.FilePath (joinPath, pathSeparator, (<.>), (</>))
30 import System.FilePath.Posix qualified as File
31 import System.IO qualified as IO
32 import Test.Syd
33 import Text.Blaze.Html5.Attributes qualified as HA
34 import Wiktionary qualified
35 import Worksheets.Utils.HTML (Html, className, classes, cm, styles, (!))
36 import Worksheets.Utils.HTML qualified as HTML
37 import Worksheets.Utils.IPA qualified as IPA
38 import Worksheets.Utils.Paper qualified as Paper
39 import Worksheets.Utils.Prelude
40 import Worksheets.Utils.SQL qualified as SQL
41 import Prelude (error, even, mod)
42 import Utils.Tests
43
44 cardsHtml :: String -> IO Html -> TestDefM (outers) () ()
45 cardsHtml title html = do
46 outPath <- goldenPath title "html"
47 builder <- html <&> HTML.renderMarkupBuilder & liftIO
48 it title do
49 goldenByteStringBuilderFile outPath (return builder)
50
51 pronunciationDictTest dict = do
52 let title = "lexer"
53 outPath <- goldenPath title "html"
54 builder <- dict & Pron.tableHtml <&> HTML.renderMarkupBuilder & liftIO
55 it title do
56 goldenByteStringBuilderFile outPath (return builder)
57
58 pronunciationDecompositionTest :: String -> TestDefM (outers) () ()
59 pronunciationDecompositionTest sentence = do
60 outPath <- goldenPath sentence "txt"
61 it sentence do
62 pureGoldenByteStringBuilderFile outPath
63 $ case sentence & Text.pack & Pron.lexerPron French.pronunciationDict of
64 Right x ->
65 x & foldMap \(key, lex) ->
66 key
67 <> " → "
68 <> lex
69 <> "\n"
70 & Text.encodeUtf8
71 & ByteString.Builder.byteString
72 err@Left{} -> pShowNoColor err & Text.Lazy.encodeUtf8 & ByteString.Builder.lazyByteString
73
74 spec :: HasCallStack => Spec
75 spec = do
76 describe "English" do
77 pronunciationDictTest $ English.pronunciationDict
78 describe "French" do
79 pronunciationDictTest $ French.pronunciationDict
80 describe "textToLexeme" do
81 forM_ sentences \sentence ->
82 pronunciationDecompositionTest $ sentence & Text.unpack
83 withoutRetries do
84 withoutTimeout do
85 around (\k -> SQL.withConnection "data/langs/français/kaikki/wiktionary=fr.lang=fr.sqlite3" k) do
86 outDB <- goldenPath "wiktionary" "sqlite"
87 outDBExists <- IO.doesFileExist outDB & liftIO
88 when (outDBExists) do
89 IO.removeFile outDB & liftIO
90 aroundWith
91 ( \k wiktConn -> do
92 SQL.withConnection outDB \outConn ->
93 k (wiktConn, outConn)
94 )
95 do
96 it "check-against-wiktionary" \(wiktConn, outConn) -> do
97 SQL.execute_ outConn $ "PRAGMA journal_mode = OFF"
98 SQL.execute_ outConn $ "PRAGMA synchronous = OFF"
99 SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS results (exprLit TEXT NON NULL, wiktBroad TEXT NON NULL, wiktBroadSegments TEXT NON NULL, synthBroad TEXT NON NULL, synthExplain TEXT NON NULL)"
100 SQL.execute_ outConn $ "CREATE INDEX results__exprLit ON results (exprLit);"
101 SQL.execute_ outConn $ "CREATE INDEX results__wiktBroad ON results (wiktBroad);"
102 SQL.execute_ outConn $ "CREATE INDEX results__synthBroad ON results (synthBroad);"
103 SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS errors (exprId INTEGER NON NULL, exprLit TEXT NON NULL, error TEXT NON NULL)"
104 SQL.fold
105 wiktConn
106 "SELECT id,word,sounds FROM wiktionary WHERE lang_code='fr' LIMIT 1000"
107 ()
108 (1 :: Int)
109 \ !i
110 ( exprId :: Int
111 , maybeWord :: Maybe Text
112 , sounds :: [Wiktionary.Sound]
113 ) -> do
114 case maybeWord of
115 Nothing -> return ()
116 Just exprLit ->
117 case exprLit & Pron.lexerPron French.pronunciationDict of
118 Left err -> do
119 SQL.execute
120 outConn
121 "INSERT INTO errors (exprId, exprLit, error) VALUES (?,?,?)"
122 ( exprId
123 , exprLit
124 , err & pShowNoColor
125 )
126 Right lexemes -> do
127 let synthExplain = lexemes <&> (\(key, lex) -> key <> " → " <> lex) & Text.intercalate "; "
128 let synthBroad = lexemes & foldMap snd
129 forM_ sounds \Wiktionary.Sound{..} -> do
130 -- IO.hPrint IO.stderr (i, ident::Int, maybeWord)
131 case sound_ipa of
132 Just (IPA.IPAPhons exprIPAs) -> do
133 forM_ exprIPAs \case
134 IPA.IPAPhonemic exprBroad -> do
135 when (i `mod` 5000 == 0) do
136 IO.hPrint IO.stderr (i, exprLit, exprBroad)
137 SQL.execute
138 outConn
139 "INSERT INTO results(exprLit, wiktBroad, wiktBroadSegments, synthBroad, synthExplain) VALUES(?,?,?,?,?);"
140 ( exprLit
141 , exprBroad & IPA.ipaWordsToText Nothing
142 , exprBroad & IPA.dropSupraSegmentalFeatures & IPA.ipaWordsToText Nothing
143 , synthBroad
144 , synthExplain
145 ) -- & traceShowId
146 IPA.IPAPhonError (_errMsg, _err) -> return ()
147 _ -> return ()
148 _ -> return ()
149 return (i + 1)
150 return @IO ()
151
152 {-
153 mangleSupraSegmentalFeatures :: [IPA.Syllable []] -> [[IPA.Segment]]
154 mangleSupraSegmentalFeatures = \case
155 [] -> []
156 IPA.Syllable syl : ts -> syl : mangleSupraSegmentalFeatures ts
157 IPA.WithSuprasegmentalFeature IPA.Linking syl : ts ->
158 case mangleSupraSegmentalFeatures ts of
159 [] -> [dropSupraSegmentalFeatures syl]
160 x : xs -> (dropSupraSegmentalFeatures syl <> x) : xs
161 IPA.WithSuprasegmentalFeature _feat syl : xs ->
162 mangleSupraSegmentalFeatures (syl : xs)
163 -}
164
165 -- cardsHtml "syllabes" $ French.syllablesTable & French.syllablesTableToHTML
166
167 sentences :: [Text]
168 sentences =
169 ( [ "ampoules"
170 , "papillon"
171 , "aiguille"
172 , "aimer"
173 , "ail"
174 ]
175 <>
176 )
177 $ French.pronunciationDict
178 & Pron.lexerDictMap
179 & Map.elems
180 <&> \lex ->
181 lex
182 & Pron.lexemeExample
183 & foldMap (Pron.literalText >>> ShortText.toText)