Module:cu-pronunciation/sandbox

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Testcases

[edit]

6 of 7 tests failed. (refresh)

TextExpectedActual
test_all:
Passedсло̏воs̪ɫ̪ɔ̟˦vɔ̟˨s̪ɫ̪ɔ̟˦vɔ̟˨
Failedдальчьнъd̪ɑl̪ɪt͡ʃɪn̪ʊ̆dalɪt͡ʃɪnʊ̆
Failedйгрьjɪ̆ɡr̩jɪgrɪ
Failedѿкаꙁъɔt̪ʊ̆kɑz̪ʊ̆ɔtʊ̆kazʊ̆
Failedюгъjyɡʊ̆juɡʊ
Failedмєждоуmɛ̠ʒdumɛʒdu
Failedсъчѧстьнъs̪ʊ̆t͡ʃɛ̃s̪t̪ɪn̪ʊ̆sʊ̆t͡ʃɛ̃stɪnʊ̆

local export = {}

local rsubn = mw.ustring.gsub
local U = mw.ustring.char

local TIE = U(0x361)
local BREVE = U(0x306)
local DENTAL = U(0x32A)
local SYLLABIC = U(0x329)
local FRONTED = U(0x31F)
local RETRACTED = U(0x320)
local NASAL= U(0x303)
local GRAVE = U(0x300)
local D_GRAVE = U(0x30F)
local HIGH_TONE = U(0x2E6)
local LOW_TONE = U(0x2E8)
local vowels = "aæɛiɪɔuʊɯy"
local vowels_c = "[" .. vowels .. "]"
local palatal_cons = "ʃʒjʲɲɕʑʎ"
local cons = "bvɡdʒzjkɫʎmnɲpɾrstfxʃɕʑ"  .. TIE .. BREVE .. DENTAL
local diacritics = TIE .. BREVE .. DENTAL .. SYLLABIC .. FRONTED  .. RETRACTED .. NASAL
local tones = HIGH_TONE .. LOW_TONE

-- characters that map to IPA sounds
local phonetic_chars_map = {
	["а"] = "ɑ",
	["б"] = "b",
	["в"] = "v",
	["г"] = "ɡ",
	["д"] = "d" .. DENTAL,
	["е"] = "ɛ" .. RETRACTED,
	["є"] = "ɛ" .. RETRACTED,
	["ж"] = "ʒ",
	["ѕ"] = "d" .. DENTAL .. TIE .. "z" .. DENTAL,
	["ꙃ"] = "d" .. TIE .. "z",
	["ꙁ"] = "z" .. DENTAL,
	["з"] = "z" .. DENTAL,
	["и"] = "i",
	["й"] = "jь",
	["і"] = "i",
	["ї"] = "i",
	["к"] = "k",
	["л"] = "ɫ" .. DENTAL,
	["м"] = "m",
	["н"] = "n" .. DENTAL,
	["о"] = "ɔ" .. FRONTED,
	["п"] = "p",
	["р"] = "ɾ" .. DENTAL,
	["с"] = "s" .. DENTAL,
	["т"] = "t" .. DENTAL,
	["ꙋ"] = "u",
	["ф"] = "f",
	["х"] = "x",
	["ѿ"] = "ɔt" .. DENTAL .. "ʊ" .. BREVE,
	["ц"] = "t" .. DENTAL .. TIE .. "s" .. DENTAL,
	["ч"] = "t" .. TIE .. "ʃ",
	["ш"] = "ʃ",
	["щ"] = "ʃt",
	["ꙑ"] = "ɯ",
	["ы"] = "ɯ",
	["ѣ"] = "æ",
	["ѫ"] = "ɔ" .. NASAL,
	["ѧ"] = "ɛ" .. NASAL,
	["я"] = "ɛ" .. NASAL,
	["ю"] = "ju",
	["ꙗ"] = "jɑ",
	["ѥ"] = "jɛ" .. RETRACTED,
	["ꙓ"] = "jæ",
	["ꙙ"] = "jɛ" .. NASAL,
	["ѩ"] = "jɛ" .. NASAL,
	["ѭ"] = "jɔ" .. NASAL,
	["ѳ"] = "t" .. DENTAL,
	["ѯ"] = "kʊ" .. BREVE .. "s" .. DENTAL,
	["ѱ"] = "pʊ" .. BREVE .. "s" .. DENTAL,
	["ѡ"] = "ɔ",
	["ꙉ"] = "ʒd",
	["ѻ"] = "ɔ",
	["ꙍ"] = "ɔ",
	["ѽ"] = "ɔ",
	["у"] = "y",
	["ѵ"] = "y",
	["҇"] = "ʲ",
	["҄"] = "ʲ"
}

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end
 
function export.toIPA(term)
	term = mw.ustring.toNFC(mw.ustring.lower(term))

	-- Change Cyrillic glyphs to IPA symbols
	term = rsub(term, ".", phonetic_chars_map)
	-- Change y into its actual sound value
	term = rsub(term, "ɔ" .. FRONTED .. "y", "u")
	term = rsub(term, "([" .. vowels .. "][" .. diacritics .. "])y", "%1v")
	term = rsub(term, "([" .. vowels .. "][" .. diacritics .. "][" .. tones .. "])y", "%1v")
	term = rsub(term, "([" .. vowels .. "])y", "%1v")
	term = rsub(term, "([" .. cons .. "#])y", "%1i")
	-- Change j to ʲ after consonants
	term = rsub(term, "([" .. cons .. "#])j", "%1ʲ")
	-- Palatalize vowels
	term = rsub(term, "([" .. palatal_cons .. "])ɑ", "%1æ")
	term = rsub(term, "([" .. palatal_cons .. "])u", "%1y")
	term = rsub(term, "([" .. palatal_cons .. "])ɔ" .. NASAL, "%1ɛ" .. NASAL)
	-- Palatalize dental consonants
	term = rsub(term, "ɫ" .. DENTAL .. "ʲ", "ʎ")
	term = rsub(term, "ɾ" .. DENTAL .. "ʲ", "ɾʲ")
	term = rsub(term, "n" .. DENTAL .. "ʲ", "ɲ")
	term = rsub(term, "ʒd" .. DENTAL, "ʒd")
	term = rsub(term, "ʃt" .. DENTAL, "ʃt")
	-- Handle nasal gamma in Greek loanwords
	term = rsub(term, "[nɡ]" .. DENTAL .. "?([ɡk])", "n" .. DENTAL .. "ъ%1")
	-- Raise tense yers
	term = rsub(term, "ъj", "ɯj")
	term = rsub(term, "ьj", "ij")
	-- Remove yers succeeding a syllabic liquid
	term = rsub(term, "([" .. cons .."])ɾ" .. DENTAL .. "[ъь]", "%1r" .. SYLLABIC)
	term = rsub(term, "([" .. cons .."])ɫ" .. DENTAL .. "[ъь]", "%1ɫ" .. SYLLABIC)
	-- Allophone of ɫ before front vowels
	term = rsub(term, "ɫ" .. DENTAL .. "([iьɛæ])", "l" .. DENTAL .. "%1")
	-- Then change strong yers into IPA
	term = rsub(term, "ь([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([ъьʊɪ])", "ɪ%1%2")
	term = rsub(term, "ь([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. cons .. "])", "ɪ%1%2")
	term = rsub(term, "ь([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([ъьʊɪ])", "ɪ%1%2")
	term = rsub(term, "ь([" .. cons .. "][" .. diacritics .. "][" .. cons .. "])([ъьʊɪ])", "ɪ%1%2")
	term = rsub(term, "ь([" .. cons .. "][" .. cons .. "][" .. diacritics .. "])([ъьʊɪ])", "ɪ%1%2")
	term = rsub(term, "ь([" .. cons .. "][" .. cons .. "])([ъьʊɪ])", "ɪ%1%2")
	term = rsub(term, "ь([" .. cons .."][" .. diacritics .. "])([ъьʊɪ])", "ɪ%1%2")
	term = rsub(term, "ь([" .. cons .."])([ъьʊɪ])", "ɪ%1%2")
	term = rsub(term, "ъ([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([ъьʊɪ])", "ʊ%1%2")
	term = rsub(term, "ъ([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. cons .. "])", "ʊ%1%2")
	term = rsub(term, "ъ([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([ъьʊɪ])", "ʊ%1%2")
	term = rsub(term, "ъ([" .. cons .. "][" .. diacritics .. "][" .. cons .. "])([ъьʊɪ])", "ʊ%1%2")
	term = rsub(term, "ъ([" .. cons .. "][" .. cons .. "][" .. diacritics .. "])([ъьʊɪ])", "ʊ%1%2")
	term = rsub(term, "ъ([" .. cons .. "][" .. cons .. "])([ъьʊɪ])", "ʊ%1%2")
	term = rsub(term, "ъ([" .. cons .."][" .. diacritics .. "])([ъьʊɪ])", "ʊ%1%2")
	term = rsub(term, "ъ([" .. cons .."])([ъьʊɪ])", "ʊ%1%2")
	-- Finally do the same for weak yers
	term = rsub(term, "ь", "ɪ" .. BREVE)
	term = rsub(term, "ъ", "ʊ" .. BREVE)
    -- Iotate i, ɛ, ɛ̃, æ, and optionally a at the start of a word and after a vowel
	term = rsub(term, "%f[%a%-" .. DENTAL .. FRONTED .. "]([iɛæ])", "j%1")
	term = rsub(term, "([" .. vowels .. "])([iɛæ])", "j%1")
	term = rsub(term, "([" .. vowels .. "][" .. diacritics .. "])([iɛæ])", "%1j%2")
	term = rsub(term, "%f[%a%-" .. DENTAL .. FRONTED .. "]ɑ", "(j)ɑ")
	-- Remove unnecessary characters
	term = rsub(term, "%[", "")
	term = rsub(term, "%]", "")
	term = rsub(term, "%-", "")
	-- Convert pitch accent
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "][" .. diacritics .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term,  GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. cons .. "])([" .. vowels .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "][" .. diacritics .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "])([" .. vowels .. "][" .. diacritics .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "])([" .. vowels .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "][" .. diacritics .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. cons .. "])([" .. vowels .. "][" .. diacritics .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. cons .. "])([" .. vowels .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .."][" .. diacritics .. "])([" .. vowels .. "][" .. diacritics .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .."][" .. diacritics .. "])([" .. vowels .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .."])([" .. vowels .. "][" .. diacritics .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .."])([" .. vowels .. "])", LOW_TONE .. "%1%2" .. HIGH_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "][" .. diacritics .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. cons .. "])([" .. vowels .. "][" .. diacritics .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. cons .. "])([" .. vowels .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "][" .. diacritics .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "])([" .. vowels .. "][" .. diacritics .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .. "][" .. diacritics .. "][" .. cons .. "])([" .. vowels .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "][" .. diacritics .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .. "][" .. cons .. "][" .. diacritics .. "])([" .. vowels .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .. "][" .. cons .. "])([" .. vowels .. "][" .. diacritics .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .. "][" .. cons .. "])([" .. vowels .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .."][" .. diacritics .. "])([" .. vowels .. "][" .. diacritics .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .."][" .. diacritics .. "])([" .. vowels .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .."])([" .. vowels .. "][" .. diacritics .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	term = rsub(term, D_GRAVE .. "([" .. cons .."])([" .. vowels .. "])", HIGH_TONE .. "%1%2" .. LOW_TONE)
	return term
end

function export.show(frame)
	local params = {
		[1] = {}
	}

	local title = mw.title.getCurrentTitle()
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1] or title.nsText == "Template" and "примѣръ" or title.text
	
	local ipa = export.toIPA(term)

	ipa = "[" .. ipa .. "]"
	ipa = require("Module:IPA").format_IPA_full { lang = require("Module:languages").getByCode("cu"), items = {{ pron = ipa }} }

	return ipa
end

return export