Module:User:AmazingJus/mn
Appearance
- The following documentation is located at Module:User:AmazingJus/mn/documentation. [edit] Categories were auto-generated by Module:documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
mn
[edit]- 22 of 39 tests failed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
![]() | авто́бус (avtóbus) | /afˈtʰɔpʊs/ | /awˈtʰps/ |
![]() | ай (aj) | /ˈai̯/ | /ˈai̯/ |
![]() | аргалаа (argalaa) | /arɢəˈɮa/ | /arɢˈɮa/ |
![]() | ая (aja) | /ˈaj/ | /ajˈa/ |
![]() | хонх (xonx) | /ˈxɔŋx/ | /ˈxɔnx/ |
![]() | баг (bag) | /ˈpaɡ/ | /ˈpaɡ/ |
![]() | бага (baga) | /ˈpaɢ/ | /ˈpaɢ/ |
![]() | борви (borvi) | /ˈpɔrʲəwʲ/ | /ˈpɔrʲwʲ/ |
![]() | Будда~ (Budda~) | /pʊtˈta/ | /pʊtˈta/ |
![]() | Будда́ (Buddá) | /pʊtˈta/ | /pʊtˈt/ |
![]() | галуу (galuu) | /ɢaˈɮʊ/ | /ɢaˈɮʊ/ |
![]() | ежен (ježen) | /ˈjet͡ɕəŋ/ | /ˈjet͡ɕŋ/ |
![]() | коммерса́нт (kommersánt) | /kʰɔmirˈsantʰ/ | /kʰɔmmrˈsntʰ/ |
![]() | лхагва (lxagva) | /ˈɬaɢʷ/ | /ˈɬaɢw/ |
![]() | мэргэжилтэн (mergežilten) | /ˈmerəɢt͡ɕəɮtʰəŋ/ | /ˈmerɢt͡ɕɮtʰŋ/ |
![]() | монгол хэл (mongol xel) | /ˈmɔnɢəɮ xeɮ/ | /ˈmɔnɢɮ ˈxeɮ/ |
![]() | нутаг (nutag) | /ˈnʊtʰəɡ/ | /ˈnʊtʰɡ/ |
![]() | оньс (onʹs) | /ˈɔnʲs/ | /ˈɔnʲs/ |
![]() | сайн (sajn) | /ˈsai̯ŋ/ | /ˈsai̯ŋ/ |
![]() | сайн байна уу (sajn bajna uu) | /sai̯ŋ pai̯n ˈʊː/ | /ˈsai̯ŋ ˈpai̯n ˈʊː/ |
![]() | салхи (salxi) | /ˈsaɮʲxʲ/ | /ˈsaɮʲxʲ/ |
![]() | сармагчин (sarmagčin) | /ˈsarməɡt͡ɕʰəŋ/ | /ˈsarmɢt͡ɕʰŋ/ |
![]() | тагт (tagt) | /ˈtʰaɢtʰ/ | /ˈtʰaɢtʰ/ |
![]() | таг*т (tag*t) | /ˈtʰaɡtʰ/ | /ˈtʰaɡtʰ/ |
![]() | такси́ (taksí) | /tʰakʰˈsʲi/ | /tʰakʰˈs/ |
![]() | тийм (tiim) | /ˈtʲʰiːm/ | /ˈtʲʰiːm/ |
![]() | хаалга (xaalga) | /ˈxaːɮəɢ/ | /ˈxaːɮɢ/ |
![]() | хиам (xiam) | /ˈxʲaːm/ | /ˈxʲaːm/ |
![]() | хонх (xonx) | /ˈxɔŋx/ | /ˈxɔnx/ |
![]() | цонх (conx) | /ˈt͡sʰɔŋx/ | /ˈt͡sʰɔnx/ |
![]() | шуу (šuu) | /ˈʃʊː/ | /ˈʃʊː/ |
![]() | ууртайгаар (uurtajgaar) | /ʊːrˈtʰai̯ɢar/ | /ʊːrˈtʰai̯ɢar/ |
![]() | уйтгартай (ujtgartaj) | /ˈʊi̯tʰɢərtʰai̯/ | /ʊi̯tʰɢrˈtʰai̯/ |
![]() | юул (juul) | /ˈjʊːɮ/ | /ˈjʊːɮ/ |
![]() | энэ (ene) | /ˈin/ | /ˈin/ |
![]() | эрдэнэ (erdene) | /ˈirtən/ | /ˈirtn/ |
![]() | Эрдэнэт (Erdenet) | /ˈirtəntʰ/ | /ˈirtntʰ/ |
![]() | Өргөдөл (Örgödöl) | /ˈorəɢtəɮ/ | /ˈorɢtɮ/ |
![]() | хөшөөт (xöšööt) | /xoˈʃotʰ/ | /xoˈʃotʰ/ |
local export = {}
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsubn = mw.ustring.gsub
local lc = mw.ustring.lower
local match = mw.ustring.match
local strip = mw.text.trim
local sub = mw.ustring.sub
local u = require("Module:string/char")
local lang = require("Module:languages").getByCode("mn")
local sc = require("Module:scripts").getByCode("Cyrl")
local mn = require("Module:mn-common")
local final_clusters = require("Module:mn/data").syll_final_cons
function export.tag_text(text, face)
return require("Module:script utilities").tag_text(text, lang, sc, face)
end
function export.link(term, face)
return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end
-- A version of gsubn() that discards all but the first return value.
local function gsub(term, foo, bar, n)
local retval = gsubn(term, foo, bar, n)
return retval
end
--[[
Primarily sourced from The Phonology of Mongolian by Jan-Olof Svantesson (2005)
]]--
--[[
Define certain sets of characters.
]]--
local chars = {
c = "бвгджзклмнпрстфхцчшщ", -- Consonants
v = "аеёиоуэюяөү", -- All reducible vowels
u = "aeiɔoʊu", -- All full vowels (IPA)
y = "аеёиыоуэюяөү" -- All vowels
}
--[[
Define stress accents with their corresponding IPA representation.
]]--
local stress = {
[u(0x0301)] = u(0x02C8), -- Primary stress (acute accent, ˈ)
[u(0x0300)] = u(0x02CC) -- Secondary stress (grave accent, ˌ)
}
--[[
Define other symbols.
]]--
local long = u(0x02D0) -- Long vowel mark (ː)
local diphthong = u(0x032F) -- Diphthong mark (̯)
local primary = u(0x02C8) -- Primary stress mark (ˈ)
local secondary = u(0x02CC) -- Secondary stress mark (ˌ)
--[[
Map letters to their respective representations.
--]]
local mapping = {
["cons"] = { -- Consonants (phonemic)
["б"] = "p", ["в"] = "w", ["г"] = "ɢ", ["г*"] = "ɡ", ["д"] = "t", ["ж"] = "t͡ɕ", ["з"] = "t͡s", ["й"] = "i̯",
["к"] = "kʰ", ["л"] = "ɮ", ["м"] = "m", ["н"] = "n", ["н*"] = "ŋ", ["п"] = "pʰ", ["р"] = "r", ["с"] = "s",
["т"] = "tʰ", ["ф"] = "f", ["х"] = "x", ["ц"] = "t͡sʰ", ["ч"] = "t͡ɕʰ", ["ш"] = "ʃ", ["щ"] = "ʃt͡ɕ",
},
["vowels"] = { -- Vowels (phonemic)
["а"] = "a", ["е"] = "je", ["ё"] = "jɔ", ["и"] = "i", ["о"] = "ɔ", ["у"] = "ʊ", ["э"] = "e",
["ю"] = "jʊ", ["я"] = "ja", ["ө"] = "o", ["ү"] = "u", ["ы"] = "i", ["ъ"] = "", ["ь"] = "ʲ"
},
["double"] = { -- Double vowels (orthographic)
"аа", "ее", "еи", "еө", "ёо", "ий", "оо",
"уу", "ээ", "юу", "юү", "яа", "өө", "үү"
},
["alloph"] = { -- Consonant allophones (phonemic)
["w"] = "w̜", ["ɡʲ"] = "ɟ", ["xʲ"] = "ç", ["x"] = "χ"
},
["diph"] = { -- Diphthongs (phonetic)
["ai"] = "æe", ["ei"] = "e", ["oi"] = "ɞe", ["ui"] = "ɵe", ["üi"] = "ue"
}
}
--[[
Determine the position of a vowel in a word.
First, check the vowel harmony of the vowel. If the vowel is not
part of a vowel harmony, return nil. Otherwise, return the position
of the vowel in the word.
]]--
local function get_position(vowel)
local vh = mn.vowelharmony(vowel)
if vh[1] then
return vh[1].position
end
return nil
end
--[[
Check if a given cluster is valid.
Loop through all the final clusters and check if the given cluster
matches any of the patterns. If so, return true, otherwise return false.
]]--
local function is_valid_cluster(a, b)
if b == "j" then
return true -- Any cluster ending in /j/ is valid.
end
for _, depth in ipairs(final_clusters) do -- Loop through every final cluster pattern.
for _, pattern in ipairs(depth) do
if match(pattern, a .. b) then
return true
end
end
end
return false
end
--[[
Handle vowel quality based on syllable position by respelling. Matches initial and
non-initial syllables and checks for the quality of the syllable. In initial syllables
(marked with an initial #), single vowels are short and double vowels are long.
In non-initial syllables, single vowels are reduced and double vowels are short.
Note that reduced vowels are removed but are added back in accordance with Mongolian
phonological rules.
--]]
local function respell_vowels(word)
-- Handle substitutions to palatalise consonants.
word = gsub(word, "([" .. chars.c .. "])и([аоу])", "%1j%2%2") -- иа, ио, иу are long monophthongs which palatalise the preceding consonant.
word = gsub(word, "и(й?)", "jи%1") -- Palatalise all other instances of и. (FIXME: need to consider when converting chars.v into Cyrillic)
word = gsub(word, "([" .. chars.c .. "])j", function(c) return match("[бвгдлмнпртх]", c) and c .. "ʲ" or c end) -- Certain consonants are palatalised before orthographic й.
-- Handle vowel respellings.
word = gsub(word, "#э", "#и") -- э word-initially merges with и.
word = gsub(word, "([" .. chars.v .. "])~", "%1" .. long) -- A tilde singifies a vowel's full form word-finally.
for _, vowel in pairs(mapping.double) do
word = gsub(word, vowel, sub(vowel, 1, 1) .. long) -- Double vowels are long.
end
word = gsub(word, "([гн])#", "%1*#") -- Mark word-final г and н with an asterisk (important for distinguishing between uvular and alveolar phonemes marked by silent vowels).
-- Handle non-initial syllables.
return gsub(word, "(#[^" .. chars.y .. "]*[" .. chars.y .. "]+)(.*#)", function(i, non_i) -- Match initial and non-initial syllables.
non_i = gsub(non_i, "[" .. chars.v .. "]([^" .. long .. "й])", "%1") -- Remove single vowels (excluding diphtongs) in non-initial syllables.
non_i = gsub(non_i, "([" .. chars.v .. "])" .. long, "%1") -- Double vowels are actually short in non-initial syllables.
return i .. non_i -- Concatenate initial and non-initial syllables with schwa handling.
end)
end
--[[
Handle substitutions for consonants.
--]]
local function handle_consonants(word)
--[[
word = gsub(word, "([" .. chars.b .. "]?)г([^* ])", function(b, c) -- Handle further substituions for vular consonants.
if b ~= "" or match(chars.b, c) then
return b .. "г*" .. c -- г is uvular when in contact with back vowels and non-final.
end
end)
--]]
-- word = gsub(word, "н([" .. chars.v .. "ыгшх])", "н*%1") -- н is uvular preceding a vowel or г, ш, х
word = gsub(word, "#(" .. primary .. "?)лх", "#%1ɬ") -- лх word-initially is a voiceless alveolar lateral fricative (found initially in some Tibetan loanwords).
word = gsub(word, "([" .. chars.c .. "ə]+)ʲ", function(c) return gsub(c, "([" .. chars.c .. "])", "%1ʲ") end) -- All consonants before a palatalised consonant are also palatalised.
word = gsub(word, ".%*?", mapping.cons) -- Substitute consonants with their IPA representation.
return gsub(word, "ʰʲ", "ʲʰ") -- Swap the palatal mark with the aspirated.
end
--[[
Determine primary stress for any given word.
According to https://roa.rutgers.edu/files/172-0197/172-0197-WALKER-0-1.PDF,
any rightmost non-final heavy syllable is stressed.
If the final syllable is heavy but all previous syllables are non-heavy,
that syllable is stressed. Otherwise, the first syllable is stressed.
--]]
local function add_stress(word)
if match(word, u(0x0301)) then
word = gsub(word, ".", stress) -- No need to determine stress if a primary stress mark is present.
else
word = gsub(word, "(#[^" .. chars.u .. "]*[" .. chars.u .. "]+[й" .. long .. "]?)(.*#)", function(i, non_i) -- Match initial and non-initial syllables.
local _, non_i_count = gsubn(non_i, "[" .. chars.u .. "]", "") -- Count the number of full vowels in the non-initial syllable.
if non_i_count == 0 or (non_i_count == 1 and match("[" .. long .. "й]", i)) then
i = gsub(i, "#", "#" .. primary)
return i .. non_i -- Stress the initial syllable if there is no full vowel in the non-initial syllable or a single long vowel in the initial syllable.
else
non_i = gsub(non_i, "(.*)([" .. chars.u .. "])", "%1" .. primary .. "%2") -- Find the last instance of a full vowel and add primary stress before the vowel.
non_i = gsub(non_i, "(.*)([" .. chars.u .. "].*)" .. primary, "%1" .. primary .. "%2") -- However, if there is a full vowel in a previous syllable, shift the stress there instead.
return i .. non_i
end
end)
end
return gsub(word, "([" .. chars.c .. "][^" .. chars.c .. "]*)" .. primary, primary .. "%1") -- Shift the stress before the consonant of the stressed syllable.
end
--[[
Adds reduced vowels when there is no valid consonant cluster.
Looks at consonant sequences that are two or more characters long
and checks if they form a valid cluster. If not, it adds a schwa after
the first consonant.
--]]
local function add_reduced(word)
return word
end
--[[
Transcribe the term phonemically into IPA.
]]
local function pron_m(term)
return gsub(term, "(#[^#]*#)", function(word) -- Match every word.
-- return gsub(word, "(#[^" .. chars.y .. "]*[" .. chars.y .. "]+)(.*#)", function(i, non_i) -- Match the initial and non-initial syllables of each word.
-- Handle substitutions for vowels.
word = respell_vowels(word) -- Handle each vowel based on its syllable position.
word = add_reduced(word) -- Add reduced vowels based on Mongolian phonological rules
word = gsub(word, ".", mapping.vowels) -- Substitute vowels with their IPA representation.
word = add_stress(word) -- Add stress marks to the word.
word = handle_consonants(word) -- Handle consonants.
return word
end)
end
--[[
Main function for the module.
--]]
function export.toIPA(term)
if type(term) == "table" then
term = term.args[1] -- Get the user input as a table.
end
-- Handle the term.
term = lc(term) -- Make the text lowercase.
term = gsub(term, "([^ ]+)", "#%1#") -- Mark all word borders with a #.
term = pron_m(term) -- Get phonemic transcription.
return gsub(term, "#", "") -- Remove all instances of #.
end
return export