1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE OverloadedStrings #-}
4 module Language.FrenchSpec where
6 import Data.ByteString.Builder qualified as ByteString.Builder
7 import Data.GenValidity.Map ()
8 import Data.GenValidity.Sequence ()
9 import Data.GenValidity.Set ()
10 import Data.GenValidity.Text ()
11 import Data.List qualified as List
12 import Data.Map.Strict qualified as Map
13 import Data.Set qualified as Set
14 import Data.Text qualified as Text
15 import Data.Text.Encoding qualified as Text
16 import Data.Text.Lazy.Encoding qualified as Text.Lazy
17 import Data.Text.Short qualified as ShortText
18 import Data.Validity.Map ()
19 import Data.Validity.Set ()
20 import Data.Validity.Text ()
22 import Language.French qualified as French
23 import Language.Pronunciation (ExampleLiteral (..), Lexeme (..), Rule (..), RuleLexemes, after, before, begining, ending, meaning, occurence, rule, silent, word)
24 import Language.Pronunciation qualified as Pron
25 import Paths_worksheets qualified as Self
26 import System.Directory qualified as IO
27 import System.FilePath (joinPath, pathSeparator, (<.>), (</>))
28 import System.FilePath.Posix qualified as File
29 import System.IO qualified as IO
31 import Text.Blaze.Html5.Attributes qualified as HA
32 import Utils.Pronunciation qualified as Pron
34 import Wiktionary qualified
35 import Worksheets.Utils.HTML (Html, className, classes, cm, styles, (!))
36 import Worksheets.Utils.HTML qualified as HTML
37 import Worksheets.Utils.IPA qualified as IPA
38 import Worksheets.Utils.Paper qualified as Paper
39 import Worksheets.Utils.Prelude
40 import Worksheets.Utils.SQL qualified as SQL
41 import Prelude (error, even, mod)
44 instance IsString Rule where
45 fromString s = rule {ruleLexemes = fromString s}
46 [ ful "de" := single "də" ["de" & occurence]
47 , inf "'" := single "" []
48 , inf "-" := single "" []
49 , inf "," := single "" []
50 , inf "?" := single "" []
51 , inf "!" := single "" []
52 , inf borderLeftText := single "" []
53 , inf borderRightText := single "" []
55 , inf "demi" := single "dəmi" ["demi" & occurence]
56 , -- , inf "amb" := single "ɑ̃b" ["amb" & occurence, "ulance"]
57 -- , inf "amp" := single "ɑ̃p" ["amp" & occurence, "oule"]
58 -- , inf "amph" := single "ɑ̃f" ["amph" & occurence, "ithéâtre"]
59 -- , inf "an" := multi
60 -- [ [PronContextBeforeBorder] := LexemePron{lexemePronunciation="ɑ̃", lexemeExample=["pl", "an" & occurence]}
61 -- , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation="an", lexemeExample=["an" & occurence, "imal"]}
62 -- , [PronContextBeforeAny] := LexemePron{lexemePronunciation="ɑ̃", lexemeExample=["an" & occurence, "tilope"]}
64 , -- , inf "coin" := single "kwɛ̃" ["coin" & occurence, "cer"]
68 -- [ [PronContextBeforeBorder] := LexemePron{lexemePronunciation = "", lexemeExample = }
69 -- , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "im", lexemeExample = ["im" & occurence, "ortel"]}
70 -- , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "ɛ̃", lexemeExample = ["im" & occurence, "primer"]}
72 , -- , inf "in" := single "ɛ̃" ["merl", "in" & occurence]
73 -- , inf "ina" := single "ina" ["ord", "ina" & occurence, "teur"]
74 -- , inf "ine" := single "in" ["rout", "ine" & occurence]
75 -- , inf "inent" := single "in" ["dess", "inent" & occurence]
76 -- , inf "iner" := single "inɛʁ" ["iner" & occurence, "tie"]
77 -- , inf "inex" := single "inɛgz" ["inex" & occurence, "istant"]
78 -- , inf "inexp" := single "inɛksp" ["inexp" & occurence, "licable"]
79 -- , inf "inima" := single "inima" ["inima" & occurence, "ginable"]
80 -- , inf "inimi" := single "inimi" ["inimi" & occurence, "table"]
81 -- , inf "inimp" := single "inimp" ["inimp" & occurence, "ortant"]
82 -- , inf "ininf" := single "inɛ̃" ["ininf" & occurence, "lammation"]
83 -- , inf "inint" := single "inɛ̃t" ["inint" & occurence, "éressant"]
84 -- , inf "inn" := single "in" ["inn" & occurence, "ocent"]
85 -- , inf "ino" := single "ino" ["ino" & occurence, "dore"]
86 -- , inf "inu" := single "inu" ["inu" & occurence, "tile"]
87 -- , inf "ix" := single "iks" ["phén", "ix" & occurence]
88 -- , inf "gine" := single "ʒin" ["an", "gine" & occurence]
89 -- , inf "era" := single "əʁa" ["s", "era" & occurence]
90 -- , inf "erai" := single "əʁɛ" ["s", "erai" & occurence]
93 [ [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "əʁ", lexemeExample = ["mer" & occurence, "ingue"]}
94 , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "ɛʁ", lexemeExample = ["mer" & occurence, "le"]}
96 , inf "erin" := single "əʁɛ̃" ["m", "erin" & occurence, "gue"]
99 [ [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "z", lexemeExample = ["ti", "s" & occurence, "ane"]}
100 , [PronContextBeforeAnySemiVowel] := LexemePron{lexemePronunciation = "z", lexemeExample = ["pari", "s" & occurence, "ienne"]}
101 , [PronContextBeforeAny] := LexemePron{lexemePronunciation = "s", lexemeExample = ["s", "a" & occurence]}
103 , -- , inf "shirt" := single "ʃœʁt" ["shirt" & occurence]
104 , -- , inf "teau" := single "to" ["teau" & occurence]
105 -- , inf "ti" := single "ti" ["ti" & occurence, "gre"]
106 , pre "ukulele" := single "jukulele" ["ukulele" & occurence]
107 , -- , inf "ui" := single "ɥi" ["ling", "ui" & occurence, "stique"]
108 , pre "abrivent" := single "abʁivɑ̃" ["abrivent" & occurence]
109 , pre "adjuvent" := single "adʒyvɑ̃" ["adjuvent" & occurence]
110 , pre "antivent" := single "ɑ̃tivɑ̃" ["antivent" & occurence]
111 , pre "auvent" := single "ovɑ̃" ["auvent" & occurence]
112 , pre "avent" := single "avɑ̃" ["avent" & occurence]
113 , pre "aï" := single "ai" ["c", "aï" & occurence, "man"]
114 , pre "bavent" := single "bavɑ̃" ["bavent" & occurence]
115 , pre "boutavent" := single "butavɑ̃" ["boutavent" & occurence]
116 , pre "bouvent" := single "buvɑ̃" ["bouvent" & occurence]
117 , pre "bénévent" := single "benevɑ̃" ["bénévent" & occurence]
118 , pre "connivent" := single "konivɑ̃" ["connivenr" & occurence]
119 , pre "contrevent" := single "kɔ̃tʁəvɑ̃" ["contrevent" & occurence]
120 , pre "convent" := single "kɔ̃vɑ̃" ["convent" & occurence]
121 , pre "couvent" := single "kuvɑ̃" ["couvent" & occurence]
122 , pre "couvents" := single "kuvɑ̃" ["couvents" & occurence]
123 , pre "engoulevent" := single "ɑ̃gulvɑ̃" ["engoulevent" & occurence]
124 , pre "fervent" := single "fɛʁvɑ̃" ["fervent" & occurence]
125 , pre "frévent" := single "fʁevɑ̃" ["frévent" & occurence]
126 , pre "heurtevent" := single "œʁtəvɑ̃" ["heurtevent" & occurence]
127 , pre "her" := single "ɛʁ" ["her" & occurence, "be"]
128 , pre "hi" := single "i" ["hi" & occurence, "ver"]
129 , pre "ill" := single "il" ["ill" & occurence, "étrisme"]
130 , pre "jack" := single "dʒak" ["jack" & occurence]
131 , pre "jazz" := single "dʒaz" ["jazz" & occurence]
132 , pre "montsurvent" := single "mɔ̃syʁvɑ̃" ["montsurvent" & occurence]
133 , -- , pre "ni" := single "ni" ["ni" & occurence, "ais"]
134 pre "niai" := single "njɛ" ["niai" & occurence, "s"]
135 , pre "paravent" := single "paʁavɑ̃" ["paravent" & occurence]
138 [ [PronContextBeforeAny] := LexemePron{lexemePronunciation = "sɑ̃", lexemeExample = ["sen" & occurence, "t"]}
139 , [PronContextBeforeAnyVowel] := LexemePron{lexemePronunciation = "sen", lexemeExample = ["sen" & occurence, "a"]}
140 -- , inf "a" := LexemePron{lexemePronunciation="sen", lexemeExample=["sen" & occurence, "a"]}
141 -- , inf "e" := LexemePron{lexemePronunciation="sən", lexemeExample=["sen" & occurence, "estre"]}
142 -- , inf "i" := LexemePron{lexemePronunciation="sen", lexemeExample=["sen" & occurence, "ior"]}
143 -- , inf "o" := LexemePron{lexemePronunciation="sen", lexemeExample=["Sen" & occurence, "oueix"]}
144 -- , inf "u" := LexemePron{lexemePronunciation="sen", lexemeExample=[]}
146 , pre "souvent" := single "suvɑ̃" ["souvent" & occurence]
147 , pre "taillevent" := single "tɑjvɑ̃" ["taillevent" & occurence]
148 , pre "tournevent" := single "tuʁnəvɑ̃" ["tournevent" & occurence]
149 , pre "vent" := single "vɑ̃" ["vent" & occurence]
150 , pre "virevent" := single "viʁvɑ̃" ["virevent" & occurence]
151 , pre "volvent" := single "vɔlvɑ̃" ["volvent" & occurence]
152 , pre "évent" := single "evɑ̃" ["évent" & occurence]
153 , pre "œ" := single "œ" ["œ" & occurence, "uf"]
154 , pre "fier" := single "fjɛʁ" ["fier" & occurence]
155 , pre "tier" := single "tjɛʁ" ["tier" & occurence]
159 & foldMap (\(ks, v) -> [(k, v) | k <- ks])
160 & mapFromListCheckingDuplicates
163 Map.singleton PronContextBeforeAny $ LexemePron{lexemePronunciation = pron, lexemeExample = exs}
167 & foldMap (\(ks, v) -> [(k, v) | k <- ks])
168 & mapFromListCheckingDuplicates
171 spec :: HasCallStack => Spec
173 -- Pron.lexerRender $ French.pronunciationDict
174 describe "pronunciation" do
175 -- describe "decomposition" do
176 -- forM_ sentences \sentence ->
177 -- Pron.pronunciationDecompositionTest French.pronunciationDict $ sentence & Text.unpack
178 -- describe "result" do
179 -- forM_ sentences \sentence ->
180 -- Pron.pronunciationResultTest French.pronunciationDict $ sentence & Text.unpack
182 forM_ sentences \sentence ->
183 Pron.pronunciationParserTest French.pronunciationRules sentence
188 around (\k -> SQL.withConnection "data/langs/français/kaikki/wiktionary=fr.lang=fr.sqlite3" k) do
189 outDB <- goldenPath "wiktionary" "sqlite"
190 outDBExists <- IO.doesFileExist outDB & liftIO
191 when (outDBExists) do
192 IO.removeFile outDB & liftIO
195 SQL.withConnection outDB \outConn ->
196 k (wiktConn, outConn)
199 it "check-against-wiktionary" \(wiktConn, outConn) -> do
200 SQL.execute_ outConn $ "PRAGMA journal_mode = OFF"
201 SQL.execute_ outConn $ "PRAGMA synchronous = OFF"
202 SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS results (exprLit TEXT NON NULL, wiktBroad TEXT NON NULL, wiktBroadSegments TEXT NON NULL, synthBroad TEXT NON NULL, synthExplain TEXT NON NULL)"
203 SQL.execute_ outConn $ "CREATE INDEX results__exprLit ON results (exprLit);"
204 SQL.execute_ outConn $ "CREATE INDEX results__wiktBroad ON results (wiktBroad);"
205 SQL.execute_ outConn $ "CREATE INDEX results__synthBroad ON results (synthBroad);"
206 SQL.execute_ outConn $ "CREATE TABLE IF NOT EXISTS errors (exprId INTEGER NON NULL, exprLit TEXT NON NULL, error TEXT NON NULL)"
209 "SELECT id,word,sounds FROM wiktionary WHERE lang_code='fr' LIMIT 1000"
214 , maybeWord :: Maybe Text
215 , sounds :: [Wiktionary.Sound]
220 case exprLit & Pron.run French.pronunciationRules of
224 "INSERT INTO errors (exprId, exprLit, error) VALUES (?,?,?)"
230 let synthExplain = lexemes <&> (\Pron.Pron{pronInput, pronRule} -> Pron.chars pronInput <> " → " <> show (Pron.rulePron pronRule)) & List.intercalate "; "
231 let synthBroad = lexemes & foldMap (Pron.pronRule >>> Pron.rulePron >>> Pron.pronunciationText)
232 forM_ sounds \Wiktionary.Sound{..} -> do
233 -- IO.hPrint IO.stderr (i, ident::Int, maybeWord)
235 Just (IPA.IPAPhons exprIPAs) -> do
237 IPA.IPAPhonemic exprBroad -> do
238 when (i `mod` 5000 == 0) do
239 IO.hPrint IO.stderr (i, exprLit, exprBroad)
242 "INSERT INTO results(exprLit, wiktBroad, wiktBroadSegments, synthBroad, synthExplain) VALUES(?,?,?,?,?);"
244 , exprBroad & IPA.ipaWordsToText Nothing
245 , exprBroad & IPA.dropSupraSegmentalFeatures & IPA.ipaWordsToText Nothing
249 IPA.IPAPhonError (_errMsg, _err) -> return ()
257 mangleSupraSegmentalFeatures :: [IPA.Syllable []] -> [[IPA.Segment]]
258 mangleSupraSegmentalFeatures = \case
260 IPA.Syllable syl : ts -> syl : mangleSupraSegmentalFeatures ts
261 IPA.WithSuprasegmentalFeature IPA.Linking syl : ts ->
262 case mangleSupraSegmentalFeatures ts of
263 [] -> [dropSupraSegmentalFeatures syl]
264 x : xs -> (dropSupraSegmentalFeatures syl <> x) : xs
265 IPA.WithSuprasegmentalFeature _feat syl : xs ->
266 mangleSupraSegmentalFeatures (syl : xs)
269 -- cardsHtml "syllabes" $ French.syllablesTable & French.syllablesTableToHTML
271 sentences :: Set [ExampleLiteral]
278 , ["couvent"{exampleLiteralMeaning = "couver"}]
279 , ["couvent"{exampleLiteralMeaning = "monastère"}]
282 , French.pronunciationRules