]> Git — Sourcephile - julm/worksheets.git/blob - tests/Language/FrenchSpec.hs
update
[julm/worksheets.git] / tests / Language / FrenchSpec.hs
1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE OverloadedStrings #-}
3
4 module Language.FrenchSpec where
5
6 import Data.ByteString.Builder qualified as ByteString.Builder
7 import Data.GenValidity.Map ()
8 import Data.GenValidity.Sequence ()
9 import Data.GenValidity.Set ()
10 import Data.GenValidity.Text ()
11 import Data.List qualified as List
12 import Data.Map.Strict qualified as Map
13 import Data.Set qualified as Set
14 import Data.Text qualified as Text
15 import Data.Text.Encoding qualified as Text
16 import Data.Text.Lazy.Encoding qualified as Text.Lazy
17 import Data.Text.Short qualified as ShortText
18 import Data.Validity.Map ()
19 import Data.Validity.Set ()
20 import Data.Validity.Text ()
21 import Language
22 import Language.French qualified as French
23 import Language.Pronunciation (ExampleLiteral (..), Lexeme (..), Rule (..), RuleLexemes, after, before, begining, ending, meaning, occurence, rule, silent, word)
24 import Language.Pronunciation qualified as Pron
25 import Paths_worksheets qualified as Self
26 import System.Directory qualified as IO
27 import System.FilePath (joinPath, pathSeparator, (<.>), (</>))
28 import System.FilePath.Posix qualified as File
29 import System.IO qualified as IO
30 import Test.Syd
31 import Text.Blaze.Html5.Attributes qualified as HA
32 import Utils.Pronunciation qualified as Pron
33 import Utils.Tests
34 import Wiktionary qualified
35 import Worksheets.Utils.HTML (Html, className, classes, cm, styles, (!))
36 import Worksheets.Utils.HTML qualified as HTML
37 import Worksheets.Utils.IPA qualified as IPA
38 import Worksheets.Utils.Paper qualified as Paper
39 import Worksheets.Utils.Prelude
40 import Worksheets.Utils.SQL qualified as SQL
41 import Prelude (error, even, mod)
42
43 {-
44 instance IsString Rule where
45 fromString s = rule {ruleLexemes = fromString s}
46 [ ful "de" := single "də" ["de" & occurence]
47 , inf "'" := single "" []
48 , inf "-" := single "" []
49 , inf "," := single "" []
50 , inf "?" := single "" []
51 , inf "!" := single "" []
52 , inf borderLeftText := single "" []
53 , inf borderRightText := single "" []
54
55 , inf "demi" := single "dəmi" ["demi" & occurence]
56 , -- , inf "amb" := single "ɑ̃b" ["amb" & occurence, "ulance"]
57 -- , inf "amp" := single "ɑ̃p" ["amp" & occurence, "oule"]
58 -- , inf "amph" := single "ɑ̃f" ["amph" & occurence, "ithéâtre"]
59 -- , inf "an" := multi
60 -- [ [PronContextBeforeBorder] := LexemePron{lexemePronunciation="ɑ̃", lexemeExample=["pl", "an" & occurence]}
61 -- , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation="an", lexemeExample=["an" & occurence, "imal"]}
62 -- , [PronContextBeforeAny] := LexemePron{lexemePronunciation="ɑ̃", lexemeExample=["an" & occurence, "tilope"]}
63 -- ]
64 , -- , inf "coin" := single "kwɛ̃" ["coin" & occurence, "cer"]
65
66 , -- , inf "im"
67 -- := multi
68 -- [ [PronContextBeforeBorder] := LexemePron{lexemePronunciation = "", lexemeExample = }
69 -- , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "im", lexemeExample = ["im" & occurence, "ortel"]}
70 -- , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "ɛ̃", lexemeExample = ["im" & occurence, "primer"]}
71 -- ]
72 , -- , inf "in" := single "ɛ̃" ["merl", "in" & occurence]
73 -- , inf "ina" := single "ina" ["ord", "ina" & occurence, "teur"]
74 -- , inf "ine" := single "in" ["rout", "ine" & occurence]
75 -- , inf "inent" := single "in" ["dess", "inent" & occurence]
76 -- , inf "iner" := single "inɛʁ" ["iner" & occurence, "tie"]
77 -- , inf "inex" := single "inɛgz" ["inex" & occurence, "istant"]
78 -- , inf "inexp" := single "inɛksp" ["inexp" & occurence, "licable"]
79 -- , inf "inima" := single "inima" ["inima" & occurence, "ginable"]
80 -- , inf "inimi" := single "inimi" ["inimi" & occurence, "table"]
81 -- , inf "inimp" := single "inimp" ["inimp" & occurence, "ortant"]
82 -- , inf "ininf" := single "inɛ̃" ["ininf" & occurence, "lammation"]
83 -- , inf "inint" := single "inɛ̃t" ["inint" & occurence, "éressant"]
84 -- , inf "inn" := single "in" ["inn" & occurence, "ocent"]
85 -- , inf "ino" := single "ino" ["ino" & occurence, "dore"]
86 -- , inf "inu" := single "inu" ["inu" & occurence, "tile"]
87 -- , inf "ix" := single "iks" ["phén", "ix" & occurence]
88 -- , inf "gine" := single "ʒin" ["an", "gine" & occurence]
89 -- , inf "era" := single "əʁa" ["s", "era" & occurence]
90 -- , inf "erai" := single "əʁɛ" ["s", "erai" & occurence]
91 , inf "era" :=
92 multi
93 [ [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "əʁ", lexemeExample = ["mer" & occurence, "ingue"]}
94 , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "ɛʁ", lexemeExample = ["mer" & occurence, "le"]}
95 ]
96 , inf "erin" := single "əʁɛ̃" ["m", "erin" & occurence, "gue"]
97 , inf "s" :=
98 multi
99 [ [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "z", lexemeExample = ["ti", "s" & occurence, "ane"]}
100 , [PronContextBeforeAnySemiVowel] := LexemePron{lexemePronunciation = "z", lexemeExample = ["pari", "s" & occurence, "ienne"]}
101 , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "s", lexemeExample = ["s", "a" & occurence]}
102 ]
103 , -- , inf "shirt" := single "ʃœʁt" ["shirt" & occurence]
104 , -- , inf "teau" := single "to" ["teau" & occurence]
105 -- , inf "ti" := single "ti" ["ti" & occurence, "gre"]
106 , pre "ukulele" := single "jukulele" ["ukulele" & occurence]
107 , -- , inf "ui" := single "ɥi" ["ling", "ui" & occurence, "stique"]
108 , pre "abrivent" := single "abʁivɑ̃" ["abrivent" & occurence]
109 , pre "adjuvent" := single "adʒyvɑ̃" ["adjuvent" & occurence]
110 , pre "antivent" := single "ɑ̃tivɑ̃" ["antivent" & occurence]
111 , pre "auvent" := single "ovɑ̃" ["auvent" & occurence]
112 , pre "avent" := single "avɑ̃" ["avent" & occurence]
113 , pre "aï" := single "ai" ["c", "aï" & occurence, "man"]
114 , pre "bavent" := single "bavɑ̃" ["bavent" & occurence]
115 , pre "boutavent" := single "butavɑ̃" ["boutavent" & occurence]
116 , pre "bouvent" := single "buvɑ̃" ["bouvent" & occurence]
117 , pre "bénévent" := single "benevɑ̃" ["bénévent" & occurence]
118 , pre "connivent" := single "konivɑ̃" ["connivenr" & occurence]
119 , pre "contrevent" := single "kɔ̃tʁəvɑ̃" ["contrevent" & occurence]
120 , pre "convent" := single "kɔ̃vɑ̃" ["convent" & occurence]
121 , pre "couvent" := single "kuvɑ̃" ["couvent" & occurence]
122 , pre "couvents" := single "kuvɑ̃" ["couvents" & occurence]
123 , pre "engoulevent" := single "ɑ̃gulvɑ̃" ["engoulevent" & occurence]
124 , pre "fervent" := single "fɛʁvɑ̃" ["fervent" & occurence]
125 , pre "frévent" := single "fʁevɑ̃" ["frévent" & occurence]
126 , pre "heurtevent" := single "œʁtəvɑ̃" ["heurtevent" & occurence]
127 , pre "her" := single "ɛʁ" ["her" & occurence, "be"]
128 , pre "hi" := single "i" ["hi" & occurence, "ver"]
129 , pre "ill" := single "il" ["ill" & occurence, "étrisme"]
130 , pre "jack" := single "dʒak" ["jack" & occurence]
131 , pre "jazz" := single "dʒaz" ["jazz" & occurence]
132 , pre "montsurvent" := single "mɔ̃syʁvɑ̃" ["montsurvent" & occurence]
133 , -- , pre "ni" := single "ni" ["ni" & occurence, "ais"]
134 pre "niai" := single "njɛ" ["niai" & occurence, "s"]
135 , pre "paravent" := single "paʁavɑ̃" ["paravent" & occurence]
136 , pre "sen" :=
137 multi
138 [ [PronContextBeforeAny] := LexemePron{lexemePronunciation = "sɑ̃", lexemeExample = ["sen" & occurence, "t"]}
139 , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "sen", lexemeExample = ["sen" & occurence, "a"]}
140 -- , inf "a" := LexemePron{lexemePronunciation="sen", lexemeExample=["sen" & occurence, "a"]}
141 -- , inf "e" := LexemePron{lexemePronunciation="sən", lexemeExample=["sen" & occurence, "estre"]}
142 -- , inf "i" := LexemePron{lexemePronunciation="sen", lexemeExample=["sen" & occurence, "ior"]}
143 -- , inf "o" := LexemePron{lexemePronunciation="sen", lexemeExample=["Sen" & occurence, "oueix"]}
144 -- , inf "u" := LexemePron{lexemePronunciation="sen", lexemeExample=[]}
145 ]
146 , pre "souvent" := single "suvɑ̃" ["souvent" & occurence]
147 , pre "taillevent" := single "tɑjvɑ̃" ["taillevent" & occurence]
148 , pre "tournevent" := single "tuʁnəvɑ̃" ["tournevent" & occurence]
149 , pre "vent" := single "vɑ̃" ["vent" & occurence]
150 , pre "virevent" := single "viʁvɑ̃" ["virevent" & occurence]
151 , pre "volvent" := single "vɔlvɑ̃" ["volvent" & occurence]
152 , pre "évent" := single "evɑ̃" ["évent" & occurence]
153 , pre "œ" := single "œ" ["œ" & occurence, "uf"]
154 , pre "fier" := single "fjɛʁ" ["fier" & occurence]
155 , pre "tier" := single "tjɛʁ" ["tier" & occurence]
156
157 ]
158 & list
159 & foldMap (\(ks, v) -> [(k, v) | k <- ks])
160 & mapFromListCheckingDuplicates
161 where
162 single pron exs =
163 Map.singleton PronContextBeforeAny $ LexemePron{lexemePronunciation = pron, lexemeExample = exs}
164 multi l =
165 l
166 & list
167 & foldMap (\(ks, v) -> [(k, v) | k <- ks])
168 & mapFromListCheckingDuplicates
169 -}
170
171 spec :: HasCallStack => Spec
172 spec = do
173 -- Pron.lexerRender $ French.pronunciationDict
174 describe "pronunciation" do
175 -- describe "decomposition" do
176 -- forM_ sentences \sentence ->
177 -- Pron.pronunciationDecompositionTest French.pronunciationDict $ sentence & Text.unpack
178 -- describe "result" do
179 -- forM_ sentences \sentence ->
180 -- Pron.pronunciationResultTest French.pronunciationDict $ sentence & Text.unpack
181 describe "parser" do
182 forM_ sentences \sentence ->
183 Pron.pronunciationParserTest French.pronunciationRules sentence
184
185 {-
186 withoutRetries do
187 withoutTimeout do
188 around (\k -> SQL.withConnection "data/langs/français/kaikki/wiktionary=fr.lang=fr.sqlite3" k) do
189 outDB <- goldenPath "wiktionary" "sqlite"
190 outDBExists <- IO.doesFileExist outDB & liftIO
191 when (outDBExists) do
192 IO.removeFile outDB & liftIO
193 aroundWith
194 ( \k wiktConn -> do
195 SQL.withConnection outDB \outConn ->
196 k (wiktConn, outConn)
197 )
198 do
199 it "check-against-wiktionary" \(wiktConn, outConn) -> do
200 SQL.execute_ outConn $ "PRAGMA journal_mode = OFF"
201 SQL.execute_ outConn $ "PRAGMA synchronous = OFF"
202 SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS results (exprLit TEXT NON NULL, wiktBroad TEXT NON NULL, wiktBroadSegments TEXT NON NULL, synthBroad TEXT NON NULL, synthExplain TEXT NON NULL)"
203 SQL.execute_ outConn $ "CREATE INDEX results__exprLit ON results (exprLit);"
204 SQL.execute_ outConn $ "CREATE INDEX results__wiktBroad ON results (wiktBroad);"
205 SQL.execute_ outConn $ "CREATE INDEX results__synthBroad ON results (synthBroad);"
206 SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS errors (exprId INTEGER NON NULL, exprLit TEXT NON NULL, error TEXT NON NULL)"
207 SQL.fold
208 wiktConn
209 "SELECT id,word,sounds FROM wiktionary WHERE lang_code='fr' LIMIT 1000"
210 ()
211 (1 :: Int)
212 \ !i
213 ( exprId :: Int
214 , maybeWord :: Maybe Text
215 , sounds :: [Wiktionary.Sound]
216 ) -> do
217 case maybeWord of
218 Nothing -> return ()
219 Just exprLit ->
220 case exprLit & Pron.run French.pronunciationRules of
221 Left err -> do
222 SQL.execute
223 outConn
224 "INSERT INTO errors (exprId, exprLit, error) VALUES (?,?,?)"
225 ( exprId
226 , exprLit
227 , err & pShowNoColor
228 )
229 Right lexemes -> do
230 let synthExplain = lexemes <&> (\Pron.Pron{pronInput, pronRule} -> Pron.chars pronInput <> " → " <> show (Pron.rulePron pronRule)) & List.intercalate "; "
231 let synthBroad = lexemes & foldMap (Pron.pronRule >>> Pron.rulePron >>> Pron.pronunciationText)
232 forM_ sounds \Wiktionary.Sound{..} -> do
233 -- IO.hPrint IO.stderr (i, ident::Int, maybeWord)
234 case sound_ipa of
235 Just (IPA.IPAPhons exprIPAs) -> do
236 forM_ exprIPAs \case
237 IPA.IPAPhonemic exprBroad -> do
238 when (i `mod` 5000 == 0) do
239 IO.hPrint IO.stderr (i, exprLit, exprBroad)
240 SQL.execute
241 outConn
242 "INSERT INTO results(exprLit, wiktBroad, wiktBroadSegments, synthBroad, synthExplain) VALUES(?,?,?,?,?);"
243 ( exprLit
244 , exprBroad & IPA.ipaWordsToText Nothing
245 , exprBroad & IPA.dropSupraSegmentalFeatures & IPA.ipaWordsToText Nothing
246 , synthBroad
247 , synthExplain
248 ) -- & traceShowId
249 IPA.IPAPhonError (_errMsg, _err) -> return ()
250 _ -> return ()
251 _ -> return ()
252 return (i + 1)
253 return @IO ()
254 -}
255
256 {-
257 mangleSupraSegmentalFeatures :: [IPA.Syllable []] -> [[IPA.Segment]]
258 mangleSupraSegmentalFeatures = \case
259 [] -> []
260 IPA.Syllable syl : ts -> syl : mangleSupraSegmentalFeatures ts
261 IPA.WithSuprasegmentalFeature IPA.Linking syl : ts ->
262 case mangleSupraSegmentalFeatures ts of
263 [] -> [dropSupraSegmentalFeatures syl]
264 x : xs -> (dropSupraSegmentalFeatures syl <> x) : xs
265 IPA.WithSuprasegmentalFeature _feat syl : xs ->
266 mangleSupraSegmentalFeatures (syl : xs)
267 -}
268
269 -- cardsHtml "syllabes" $ French.syllablesTable & French.syllablesTableToHTML
270
271 sentences :: Set [ExampleLiteral]
272 sentences =
273 [
274 [ ["aiguille"]
275 , ["ail"]
276 , ["aimer"]
277 , ["ampoules"]
278 , ["couvent"{exampleLiteralMeaning = "couver"}]
279 , ["couvent"{exampleLiteralMeaning = "monastère"}]
280 , ["papillon"]
281 ]
282 , French.pronunciationRules
283 & Pron.examples
284 ]
285 & mconcat