]> Git — Sourcephile - julm/worksheets.git/blob - tests/WiktionarySpec.hs
wip
[julm/worksheets.git] / tests / WiktionarySpec.hs
1 {-# LANGUAGE AllowAmbiguousTypes #-}
2 {-# LANGUAGE OverloadedLists #-}
3 {-# LANGUAGE ParallelListComp #-}
4
5 module WiktionarySpec where
6
7 import Data.GenValidity.Map ()
8 import Data.GenValidity.Sequence ()
9 import Data.GenValidity.Set ()
10 import Data.GenValidity.Text ()
11 import Data.List qualified as List
12 import Data.Map.Strict qualified as Map
13 import Data.Set qualified as Set
14 import Data.Text qualified as Text
15 import Data.Text.Short qualified as ShortText
16 import Data.Validity.Map ()
17 import Data.Validity.Set ()
18 import Data.Validity.Text ()
19 import Language.IPA qualified as IPA
20 import System.Directory qualified as IO
21 import System.FilePath (joinPath, pathSeparator, (<.>), (</>))
22 import System.IO qualified as IO
23 import Test.Syd
24
25 import Data.RadixTree.Word8.Strict qualified as RadixTree
26 import Utils.Tests
27 import Wiktionary qualified
28 import Worksheets.Utils.IPA qualified as IPA
29 import Worksheets.Utils.JSON qualified as JSON
30 import Worksheets.Utils.Prelude
31 import Worksheets.Utils.SQL qualified as SQL
32 import Prelude (mod)
33
34 {-
35 wiktionaryQueryWord :: LangCode -> ShortText -> IO [Wiktionary]
36 wiktionaryQueryWord lang_code word =
37 SQL.withConnection "data/langs/français/kaikki/wiktionary=fr.lang=fr.sqlite3" \conn ->
38 SQL.query conn ("SELECT " <> SQL.selectors @Wiktionary <> " FROM wiktionary WHERE word = ? AND lang_code = ?") (word, lang_code)
39
40 wiktionaryQueryAll :: IO [Wiktionary]
41 wiktionaryQueryAll =
42
43 test = testAll
44
45 testAll = wiktionaryQueryAll <&> List.length
46
47 testError0 :: IO [Wiktionary]
48 testError0 =
49 SQL.withConnection "data/langs/français/kaikki/wiktionary=fr.lang=fr.sqlite3" \conn ->
50 SQL.query conn ("SELECT " <> SQL.selectors @Wiktionary <> " FROM wiktionary WHERE redirect = ?") (SQL.Only ("quelqu\8217un"::Text))
51 -}
52
53 -- | Join all `IPA.Segment` containing a `IPA.Linking` feature
54 -- with any following `IPA.Segment`.
55 -- And drop all other `IPA.SuprasegmentalFeature`.
56 mangleSupraSegmentalFeatures :: [IPA.Syllable []] -> [[IPA.Segment]]
57 mangleSupraSegmentalFeatures = \case
58 [] -> []
59 IPA.Syllable syl : ts -> syl : mangleSupraSegmentalFeatures ts
60 IPA.WithSuprasegmentalFeature IPA.Linking syl : ts ->
61 case mangleSupraSegmentalFeatures ts of
62 [] -> [dropSupraSegmentalFeatures syl]
63 x : xs -> (dropSupraSegmentalFeatures syl <> x) : xs
64 IPA.WithSuprasegmentalFeature _feat syl : xs ->
65 mangleSupraSegmentalFeatures (syl : xs)
66
67 type Support = Int
68
69 -- A frequent itemset is closed,
70 -- when no (immediate) superset has the same support.
71
72 supports :: Set Text -> Map Text (Sum Int)
73 supports corpus =
74 [ (prefix, Set.size words & Sum)
75 | (prefix, words) <- prefixToWords & Map.toList
76 ]
77 & Map.fromListWith (<>)
78 where
79 len = corpus & toList <&> Text.length & maximum
80 prefixToWords =
81 [ (prefix, Set.singleton word)
82 | word <- corpus & toList
83 , prefixLen <- [1 .. len]
84 , let prefix = word & Text.take prefixLen
85 ]
86 & Map.fromListWith (<>)
87
88 {-
89 closedForms (prefix, "") corpus = [(prefix, corpus & Set.size)]
90 closedForms (prefix, suffix) corpus =
91 let newPrefix = prefix <> ShortText.take 1 suffix in
92 let newSuffix = suffix & ShortText.drop 1 in
93 let corpusSize = corpus & Set.size in
94 let corpusWithLongerPrefix =
95 corpus
96 & Set.filter (\t -> t & ShortText.isPrefixOf newPrefix) in
97 if corpusSize == Set.size corpusWithLongerPrefix
98 then closedForms corpus (newPrefix, newSuffix)
99 else
100 (prefix, corpusSize)
101 : closedForms corpusWithLongerPrefix (newPrefix, newSuffix)
102 -}
103 dropSupraSegmentalFeatures = \case
104 IPA.Syllable syl -> syl
105 IPA.WithSuprasegmentalFeature _feat syl -> dropSupraSegmentalFeatures syl
106
107 lcp :: [Text] -> Text
108 lcp [] = []
109 -- use foldr1 to apply to the non-empty strings list given
110 lcp lines = List.foldr1 mapCommonPrefix lines
111 where
112 mapCommonPrefix {-new-} :: Text {-acc-} -> Text -> Text
113 mapCommonPrefix len1 len2 =
114 Text.pack $
115 List.map fst $
116 List.takeWhile (uncurry (==)) $
117 Text.zip len1 len2
118
119 spec = do
120 withoutTimeout do
121 withoutRetries do
122 describe "Wiktionary" do
123 describe "fr" do
124 describe "lang" do
125 describe "fr" do
126 around (\k -> SQL.withConnection "data/langs/français/kaikki/wiktionary=fr.lang=fr.sqlite3" k) do
127 -- runWiktionaryQuery @(SQL.Only [Wiktionary.Sound])
128 -- "sounds"
129 -- "SELECT sounds FROM wiktionary WHERE word=?"
130 -- (SQL.Only ("exercice" :: Text))
131 describe "IPA" do
132 -- do
133 -- forM (["e", "ex", "exercice"] & list) \word -> do
134 -- outPath <- goldenPath $ word
135 -- it word \conn -> do
136 -- res <- testIPAs conn (word & ShortText.pack)
137 -- return $ goldenPrettyShowInstance outPath res
138 let ipasDB = "data/langs/français/kaikki/wiktionary=fr.lang=fr.ipas.sqlite3"
139 ipasDBExists <- IO.doesFileExist ipasDB & liftIO
140 aroundWith
141 ( \k wiktConn -> do
142 SQL.withConnection ipasDB \ipasConn ->
143 k (wiktConn, ipasConn)
144 )
145 do
146 withoutTimeout do
147 unless (ipasDBExists) do
148 it "can-create-ipas" \(wiktConn, ipasConn) -> do
149 SQL.execute_ ipasConn $ "PRAGMA journal_mode = OFF"
150 SQL.execute_ ipasConn $ "PRAGMA synchronous = OFF"
151 -- SQL.execute_ ipasConn $ "PRAGMA mmap_size = " <> fromString (show (256 * 1024 * 1024 :: Int))
152 -- SQL.execute_ ipasConn $ "PRAGMA page_size = " <> fromString (show (512 * 8 * 2 :: Int))
153 -- SQL.execute_ ipasConn $ "VACUUM"
154 SQL.execute_ ipasConn $ "CREATE TABLE IF NOT EXISTS broad_to_words (broad TEXT NON NULL, broads TEXT NON NULL, word TEXT NON NULL, wordID INTEGER NON NULL, UNIQUE (broad, word))"
155 -- SQL.execute_ ipasConn $ "CREATE TABLE IF NOT EXISTS ipas_broad (wordID INTEGER NON NULL, broad1 TEXT NON NULL, word TEXT NON NULL, broad TEXT NON NULL, UNIQUE(broad, word), UNIQUE(broad1, word))"
156 -- SQL.execute_ ipasConn $ "CREATE TABLE IF NOT EXISTS ipas_narrow (wordID INTEGER NON NULL, narrow1 TEXT NON NULL, word TEXT NON NULL, narrow TEXT NON NULL, UNIQUE(narrow, word), UNIQUE(narrow1, word))"
157 -- SQL.execute_ ipasConn $ "CREATE TABLE IF NOT EXISTS ipas_error (wordID INTEGER NON NULL, error TEXT NON NULL, word TEXT NON NULL)"
158 SQL.execute_ ipasConn "CREATE INDEX broad_to_words__broad ON broad_to_words (broad);"
159 SQL.execute_ ipasConn "CREATE INDEX broad_to_words__broad_word ON broad_to_words (broad, word);"
160 -- SQL.execute_ ipasConn "CREATE INDEX ipas_broad1 ON ipas_broad (broad1);"
161 -- SQL.execute_ ipasConn "CREATE INDEX ipas_narrow1 ON ipas_narrow (narrow1);"
162 -- SQL.execute_ ipasConn "DELETE FROM ipas"
163 SQL.fold
164 wiktConn
165 "SELECT id,word,sounds FROM wiktionary WHERE lang_code='fr'"
166 ()
167 (1 :: Int)
168 \ !i
169 ( (wordID :: Int)
170 , maybeWord
171 , (sounds :: [Wiktionary.Sound])
172 ) -> do
173 -- iMax ~= 2075000
174 forM_ sounds \Wiktionary.Sound{..} -> do
175 -- IO.hPrint IO.stderr (i, ident::Int, maybeWord)
176 case (maybeWord, sound_ipa) of
177 (Just word, Just (IPA.IPAPhons ipas)) -> do
178 forM_ ipas \case
179 IPA.IPAPhonemic ipaSyllables -> do
180 let dropTonicAccent = ShortText.toText >>> Text.replace "'" ""
181 when (i `mod` 5000 == 0) do
182 IO.hPrint
183 IO.stderr
184 ( i
185 , word
186 , show (List.length ipaSyllables)
187 , ipaSyllables & IPA.ipaWordsToText (Just IPA.Phonemic)
188 )
189
190 SQL.executeMany
191 ipasConn
192 "INSERT OR IGNORE INTO broad_to_words (broad, broads, word, wordID) VALUES (?,?,?,?)"
193 [ ( ipaSyl & dropSupraSegmentalFeatures & IPA.Syllable & IPA.toIPA_ & IPA.unIPA
194 , ipaSyllables & IPA.ipaWordsToText Nothing
195 , (word :: ShortText)
196 , wordID
197 )
198 | ipaWord :: NonEmpty (IPA.Syllable []) <- ipaSyllables & toList -- & foldMap (fmap IPA.toIPA_ >>> toList)
199 , ipaSyl :: IPA.Syllable [] <- ipaWord & toList
200 ]
201 -- SQL.execute
202 -- ipasConn
203 -- "INSERT OR IGNORE INTO ipas_broad (wordID, broad1, word, broad) VALUES (?,?,?,?)"
204 -- ( wordID
205 -- , ipaSyllables & nonEmptyHead <&> IPA.toIPA_ & nonEmptyHead & IPA.unIPA
206 -- , (word :: ShortText)
207 -- , ipaSyllables & Wiktionary.ipaWordsToText IPA.Phonemic
208 -- )
209 IPA.IPAPhonError (t, _err) -> do
210 return ()
211 -- SQL.execute
212 -- ipasConn
213 -- "INSERT INTO ipas_error (word, error) VALUES (?,?)"
214 -- ( (word :: ShortText)
215 -- , t
216 -- )
217 _ -> return ()
218 _ -> return ()
219 return (i + 1)
220 return @IO ()
221 let ngramsDB = "data/langs/français/kaikki/wiktionary=fr.lang=fr.ngrams.sqlite3"
222 ngramsDBExists <- IO.doesFileExist ngramsDB & liftIO
223 aroundWith
224 ( \k wiktConn -> do
225 SQL.withConnection ngramsDB \ngramsConn ->
226 k (wiktConn, ngramsConn)
227 )
228 do
229 withoutTimeout do
230 unless (ngramsDBExists) do
231 it "can-compute-ngrams" \(wiktConn, ngramsConn) -> do
232 SQL.execute_ ngramsConn $ "PRAGMA journal_mode = OFF"
233 SQL.execute_ ngramsConn $ "PRAGMA synchronous = OFF"
234 SQL.execute_ ngramsConn $ "CREATE TABLE IF NOT EXISTS broad_to_ngrams (broad TEXT NON NULL, ngram TEXT NON NULL, count INT DEFAULT 1, UNIQUE (broad, ngram))"
235 SQL.execute_ ngramsConn "CREATE INDEX broad_to_ngrams__broad ON broad_to_ngrams (broad);"
236 SQL.execute_ ngramsConn "CREATE INDEX broad_to_ngrams__ngram ON broad_to_ngrams (ngram);"
237 -- SQL.execute_ ngramsConn "CREATE INDEX broad_to_ngrams__broad_ngram ON broad_to_ngrams (broad, ngram);"
238 SQL.execute_ ngramsConn $ "CREATE TABLE IF NOT EXISTS errors (wordID INTEGER NON NULL, word TEXT NON NULL, error TEXT NON NULL)"
239 SQL.fold
240 wiktConn
241 "SELECT id,word,sounds FROM wiktionary WHERE lang_code='fr'"
242 ()
243 (1 :: Int)
244 \ !i
245 ( (wordID :: Int)
246 , maybeWord
247 , (sounds :: [Wiktionary.Sound])
248 ) -> do
249 forM_ sounds \Wiktionary.Sound{..} -> do
250 -- IO.hPrint IO.stderr (i, ident::Int, maybeWord)
251 case (maybeWord, sound_ipa) of
252 (Just word, Just (IPA.IPAPhons ipas)) -> do
253 forM_ ipas \case
254 IPA.IPAPhonemic ipaSyllables -> do
255 let ngrams = word & Wiktionary.ngramsWithinLengths 1 6
256 when (i `mod` 5000 == 0) do
257 IO.hPrint IO.stderr (i, word, ngrams, ipaSyllables)
258 SQL.executeMany
259 ngramsConn
260 "INSERT INTO broad_to_ngrams(broad,ngram) VALUES(?,?) ON CONFLICT(broad,ngram) DO UPDATE SET count=broad_to_ngrams.count+1;"
261 [ ( broad
262 , ngram & Text.toLower
263 )
264 | wordIPA :: NonEmpty (IPA.Syllable []) <- ipaSyllables & toList
265 , syllableIPA :: IPA.Syllable [] <- wordIPA & toList
266 , let broad = syllableIPA & dropSupraSegmentalFeatures & IPA.Syllable & IPA.toIPA_ & IPA.unIPA
267 , ngram <- ngrams
268 ]
269 IPA.IPAPhonError (errMsg, _err) -> do
270 SQL.execute
271 ngramsConn
272 "INSERT INTO errors (wordID, word, error) VALUES (?,?,?)"
273 ( wordID
274 , word
275 , errMsg
276 )
277 _ -> return ()
278 _ -> return ()
279 return (i + 1)
280 return @IO ()
281 let b2eDB = "data/langs/français/kaikki/wiktionary=fr.lang=fr.broads_to_expr.sqlite3"
282 b2eDBExists <- IO.doesFileExist b2eDB & liftIO
283 unless b2eDBExists do
284 aroundWith (\k wiktConn -> SQL.withConnection b2eDB \b2eConn -> k (wiktConn, b2eConn)) do
285 it "can-compute-broads_to_expr" do
286 broadsToExprSQL "fr"
287 describe "en" do
288 describe "lang" do
289 describe "en" do
290 around (\k -> SQL.withConnection "data/langs/english/kaikki/wiktionary=en.lang=en.sqlite3" k) do
291 describe "IPA" do
292 let b2eDB = "data/langs/english/kaikki/wiktionary=en.lang=en.broads_to_expr.sqlite3"
293 -- IO.removeFile b2eDB & liftIO
294 b2eDBExists <- IO.doesFileExist b2eDB & liftIO
295 unless b2eDBExists do
296 aroundWith (\k wiktConn -> SQL.withConnection b2eDB \b2eConn -> k (wiktConn, b2eConn)) do
297 it "can-compute-broads_to_expr" $
298 broadsToExprSQL "en"
299
300 broadsToExprSQL :: Text -> (SQL.Connection, SQL.Connection) -> _
301 broadsToExprSQL lang_code (wiktConn, b2eConn) = do
302 SQL.execute_ b2eConn $ "PRAGMA journal_mode = OFF"
303 SQL.execute_ b2eConn $ "PRAGMA synchronous = OFF"
304 SQL.execute_ b2eConn $ "CREATE TABLE IF NOT EXISTS broads (exprLit TEXT NON NULL, exprBroad TEXT NON NULL, sylBroad TEXT NON NULL, wordPos INTEGER NON NULL, sylPos INTEGER NON NULL, wordEnd INTEGER NON NULL, sylEnd INTEGER NON NULL)"
305 SQL.execute_ b2eConn $ "CREATE INDEX broads__broad ON broads (sylBroad);"
306 SQL.execute_ b2eConn $ "CREATE INDEX broads__sylPos ON broads (sylPos);"
307 SQL.execute_ b2eConn $ "CREATE INDEX broads__exprLit ON broads (exprLit);"
308 SQL.execute_ b2eConn $ "CREATE INDEX broads__broad_and_exprLit ON broads (sylBroad, exprLit);"
309 SQL.execute_ b2eConn $ "CREATE TABLE IF NOT EXISTS errors (exprId INTEGER NON NULL, exprLit TEXT NON NULL, error TEXT NON NULL)"
310 SQL.fold
311 wiktConn
312 "SELECT id,word,sounds FROM wiktionary WHERE lang_code=?"
313 (SQL.Only lang_code)
314 (1 :: Int)
315 \ !i
316 ( exprId :: Int
317 , maybeWord :: Maybe Text
318 , sounds :: [Wiktionary.Sound]
319 ) -> do
320 forM_ (sounds) \snd@Wiktionary.Sound{..} -> do
321 -- pHPrint IO.stderr (i, exprId::Int, maybeWord, snd)
322 let ipas
323 | lang_code == "en" = sound_ipa <> sound_enpr
324 | otherwise = sound_ipa
325 case (maybeWord, ipas) of
326 (Just exprLit, Just (IPA.IPAPhons exprIPAs)) -> do
327 when (i `mod` 5000 == 0) do
328 IO.hPrint IO.stderr (i, exprLit, exprIPAs)
329 forM_ exprIPAs \case
330 IPA.IPAPhonemic exprBroad -> do
331 SQL.executeMany
332 b2eConn
333 "INSERT INTO broads(exprLit, exprBroad, sylBroad,wordPos,sylPos,wordEnd,sylEnd) VALUES(?,?,?,?,?,?,?);"
334 $ [ ( exprLit
335 , exprBroad & IPA.ipaWordsToText Nothing
336 , sylBroad
337 , wordPos
338 , sylPos
339 , wordEnd
340 , sylEnd
341 ) -- & traceShowId
342 | let wordEnd = exprBroad & List.length & \x -> x - 1
343 , (wordPos, wordIPA :: NonEmpty (IPA.Syllable [])) <-
344 exprBroad
345 & toList
346 & List.zip [0 :: Int ..]
347 , let sylEnd = wordIPA & List.length & \x -> x - 1
348 , (sylPos, syllableIPA :: [IPA.Segment]) <-
349 wordIPA
350 & toList
351 & mangleSupraSegmentalFeatures
352 & List.zip [0 :: Int ..]
353 , let sylBroad =
354 syllableIPA
355 & IPA.Syllable
356 & IPA.toIPA_
357 & IPA.unIPA
358 ]
359 IPA.IPAPhonError (errMsg, _err) -> do
360 SQL.execute
361 b2eConn
362 "INSERT INTO errors (exprId, exprLit, error) VALUES (?,?,?)"
363 ( exprId
364 , exprLit
365 , errMsg
366 )
367 _ -> return ()
368 _ -> return ()
369 return (i + 1)
370 return @IO ()
371
372 {-
373 let dbName = "broads_to_prefix"
374 let b2pDB = "data/langs/français/kaikki/wiktionary=fr.lang=fr."<>dbName<>".sqlite3"
375 b2pDBExists <- IO.doesFileExist b2pDB & liftIO
376 aroundWith
377 ( \k wiktConn -> do
378 SQL.withConnection b2pDB \b2pConn ->
379 k (wiktConn, b2pConn)
380 )
381 do
382 withoutTimeout do
383 unless (b2pDBExists) do
384 it ("can-compute-"<>dbName) \(wiktConn, b2pConn) -> do
385 SQL.execute_ b2pConn $ "PRAGMA journal_mode = OFF"
386 SQL.execute_ b2pConn $ "PRAGMA synchronous = OFF"
387 SQL.execute_ b2pConn $ "CREATE TABLE IF NOT EXISTS broads (sylBroad TEXT NON NULL, litPrefix1 TEXT NON NULL, litPrefix2 TEXT, litPrefix3 TEXT, litPrefix4 TEXT, litPrefix5 TEXT)"
388 SQL.execute_ b2pConn $ "CREATE INDEX broads__sylBroad ON broads (sylBroad);"
389 SQL.execute_ b2pConn $ "CREATE INDEX broads__litPrefix1 ON broads (litPrefix1);"
390 SQL.execute_ b2pConn $ "CREATE INDEX broads__litPrefix2 ON broads (litPrefix2);"
391 SQL.execute_ b2pConn $ "CREATE INDEX broads__litPrefix3 ON broads (litPrefix3);"
392 SQL.execute_ b2pConn $ "CREATE INDEX broads__litPrefix4 ON broads (litPrefix4);"
393 SQL.execute_ b2pConn $ "CREATE INDEX broads__litPrefix5 ON broads (litPrefix5);"
394 SQL.execute_ b2pConn $ "CREATE TABLE IF NOT EXISTS errors (exprId INTEGER NON NULL, exprLit TEXT NON NULL, error TEXT NON NULL)"
395 SQL.fold
396 wiktConn
397 "SELECT id,word,sounds FROM wiktionary WHERE lang_code='fr'"
398 ()
399 (1 :: Int)
400 \ !i
401 ( exprId :: Int
402 , maybeWord :: Maybe Text
403 , sounds :: [Wiktionary.Sound]
404 ) -> do
405 forM_ (sounds & List.take 1) \Wiktionary.Sound{..} -> do
406 -- IO.hPrint IO.stderr (i, ident::Int, maybeWord)
407 case (maybeWord, sound_ipa) of
408 (Just exprLit, Just (IPA.IPAPhons exprIPAs)) -> do
409 let exprLen = exprLit & Text.length
410 forM_ exprIPAs \case
411 IPA.IPAPhonemic exprBroads -> do
412 when (i `mod` 5000 == 0) do
413 IO.hPrint IO.stderr (i, exprLit, exprBroads)
414 SQL.executeMany
415 b2pConn
416 "INSERT INTO broads(sylBroad, litPrefix1, litPrefix2, litPrefix3, litPrefix4, litPrefix5) VALUES(?,?,?,?,?,?);"
417 $
418 [ ( sylBroad
419 , litPrefix1
420 , litPrefix2
421 , litPrefix3
422 , litPrefix4
423 , litPrefix5
424 ) -- & traceShowId
425 | let wordIPA =
426 exprBroads
427 & nonEmptyHead
428 , let syllableIPA =
429 wordIPA
430 & nonEmptyHead
431 , let sylBroad = syllableIPA
432 & IPA.toIPA_
433 & IPA.unIPA
434 , let litPrefix1 = exprLit & Text.take 1
435 , let litPrefix2 | 2 <= exprLen = Just $ exprLit & Text.take 2
436 | otherwise = Nothing
437 , let litPrefix3 | 3 <= exprLen = Just $ exprLit & Text.take 3
438 | otherwise = Nothing
439 , let litPrefix4 | 4 <= exprLen = Just $ exprLit & Text.take 4
440 | otherwise = Nothing
441 , let litPrefix5 | 5 <= exprLen = Just $ exprLit & Text.take 5
442 | otherwise = Nothing
443 ]
444 IPA.IPAPhonError (errMsg, _err) -> do
445 SQL.execute
446 b2pConn
447 "INSERT INTO errors (exprId, exprLit, error) VALUES (?,?,?)"
448 ( exprId
449 , exprLit
450 , errMsg
451 )
452 _ -> return ()
453 _ -> return ()
454 return (i + 1)
455 return @IO ()
456 -}
457
458 -- entrevue|\ɑ̃.tʁə.vy\
459 -- exercice|\ɛg.zɛʁ.sis\
460 -- exercice|\œ̃.n‿e.gzɛʁ.sis\
461 -- forM_ (["ɛg."] & list) \syllable -> do
462 -- outPath <- goldenPath $ syllable
463 -- it syllable \(wiktConn, ipasConn) -> do
464 -- res :: [SQL.Only Text] <- SQL.query
465 -- ipasConn
466 -- "SELECT word FROM ipas_broad WHERE broad1=?"
467 -- (SQL.Only ("ɛg."::Text))
468 -- let corpus = res <&> SQL.fromOnly & Set.fromList
469 -- return $ goldenPrettyShowInstance outPath $
470 -- supports corpus
471
472 {-
473 do
474 let testWord = "exercice"
475 let title = "lcp-" <> ShortText.unpack testWord
476 let db = "data/langs/français/kaikki/wiktionary=fr.lang=fr.ipas.sqlite3"
477 outPath <- goldenPath title
478 aroundWith (\k _ -> SQL.withConnection db k) do
479 it title \ipasConn -> do
480 let ipas :: [ShortText] = ["ɛɡ", "zɛʁ", "sis"]
481 -- foldlM :: (Foldable t, Monad m) => (b -> a -> m b) -> b -> t a -> m b
482 res <-
483 foldrM
484 ( \ipa acc -> do
485 corpus :: [SQL.Only ShortText] <-
486 SQL.query
487 ipasConn
488 "SELECT LOWER(word) FROM ipas WHERE broad LIKE (?||'.%') OR broad=?"
489 (ipa, ipa)
490 -- return $ [ (ipa, word) | SQL.Only word <- corpus]<>acc
491 return $ (ipa, corpus) : acc
492 )
493 []
494 ipas
495 return
496 $ goldenPrettyShowInstance outPath
497 $ [ (ipa, corpus & List.length)
498 | (ipa, corpus) <- res
499 ]
500 -}
501
502 -- closedForms (corpus & Set.fromList) ("", word)
503 -- res & List.filter (\t -> t & Text.isPrefixOf "ex" & not)
504 -- aroundWith (\k wiktDB -> k (wiktDB, "data/langs/français/kaikki/wiktionary=fr.lang=fr.ipas.sqlite3" :: FilePath)) do
505
506 {-
507 testIPAs :: SQL.Connection -> ShortText -> IO [IPA.IPA]
508 testIPAs conn word =
509 --
510 SQL.fold @(SQL.Only [Wiktionary.Sound])
511 conn
512 "SELECT sounds FROM wiktionary WHERE word=?"
513 (SQL.Only word)
514 []
515 \ !acc (SQL.Only sounds) ->
516 return (foldMap (\Wiktionary.Sound{sound_ipa} -> sound_ipa & maybeToList & foldMap Wiktionary.unIPAs) sounds List.++ acc)
517 -}
518 {-
519 runWiktionaryFold
520 "can-parse-it-all"
521 ("SELECT " <> SQL.selectors @Wiktionary.Wiktionary <> " FROM wiktionary")
522 ()
523 0
524 (\ !(acc :: Int) (_row :: Wiktionary.Wiktionary) -> return (acc + 1))
525 -}
526
527 runWiktionaryFold ::
528 forall row params outers acc.
529 SQL.ToRow params =>
530 SQL.FromRow row =>
531 Show acc =>
532 String ->
533 SQL.Query ->
534 params ->
535 acc ->
536 (acc -> row -> IO acc) ->
537 TestDefM outers FilePath ()
538 runWiktionaryFold title qry params init merge = do
539 outPath <- goldenPath title "txt"
540 aroundWith (\k db -> SQL.withConnection db k) do
541 withoutTimeout do
542 withoutRetries do
543 it title \conn -> do
544 res <- SQL.fold conn qry params init merge
545 return $ goldenPrettyShowInstance outPath res
546
547 runWiktionaryQuery ::
548 forall row params outers.
549 SQL.FromRow row =>
550 Show row =>
551 SQL.ToRow params =>
552 String ->
553 SQL.Query ->
554 params ->
555 TestDefM outers FilePath ()
556 runWiktionaryQuery title qry params = do
557 outPath <- goldenPath title "txt"
558 aroundWith (\k db -> SQL.withConnection db k) do
559 withoutRetries do
560 it title \conn -> do
561 res :: [row] <- SQL.query conn qry params
562 return $ goldenPrettyShowInstance outPath res
563
564 {-
565 wiktionaryToIPAs ::
566 forall wiktRow wiktParams outers.
567 SQL.FromRow wiktRow =>
568 Show wiktRow =>
569 SQL.ToRow wiktParams =>
570 String ->
571 SQL.Query ->
572 wiktParams ->
573 (SQL.Connection -> IO ()) ->
574 (SQL.Connection -> wiktRow -> IO ()) ->
575 TestDefM outers (FilePath, FilePath) ()
576 wiktionaryToIPAs title wiktQry wiktParams init merge = do
577 -- outPath <- goldenPath title
578 aroundWith
579 (\k (wiktDB, ipasDB) ->
580 SQL.withConnection wiktDB \wiktConn ->
581 SQL.withConnection ipasDB \ipasConn -> do
582 init ipasConn
583 k (wiktConn, ipasConn)) do
584 withoutTimeout do
585 withoutRetries do
586 it title \(wiktConn, ipasConn) -> do -- () do
587 SQL.fold wiktConn wiktQry wiktParams () \() row ->
588 merge ipasConn row
589 -}