Module:za-pron
Appearance
- The following documentation is located at Module:za-pron/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local lower = m_str_utils.lower
local match = m_str_utils.match
local reverse = m_str_utils.reverse
local upper = m_str_utils.upper
local lang = require("Module:languages").getByCode("za")
-- FIXME: needs rewrite [3 February 2020 (UTC)]
-- FIXME: 老壯文 seems to omit marks tones from new Mandarin borrowings ([[w:zh:新壮文#注解]])
-- https://en.wikipedia.org/wiki/Standard_Zhuang
-- https://baike.baidu.com/item/壮语/7703463
-- 在线学壮文 https://web.archive.org/web/0/http://www.gxmyw.com.cn/plus/list.php?tid=21
-- 基础壮文学习系列:壮文标点符号与书写规则 https://web.archive.org/web/0/http://www.gxmyw.com.cn/wsxzw/2013/1017/57.html
local initialConv = {
['b'] = 'p',
['mb'] = 'ɓ',
['m'] = 'm',
['f'] = 'f',
['v'] = 'β',
['by'] = 'pʲ',
['my'] = 'mʲ',
['d'] = 't',
['nd'] = 'ɗ',
['n'] = 'n',
['l'] = 'l',
['s'] = 'θ',
['ny'] = 'ɲ',
['c'] = 'ɕ',
['y'] = 'j',
['g'] = 'k',
['ng'] = 'ŋ',
['r'] = 'ɣ',
['gy'] = 'kʲ',
['ngv'] = 'ŋʷ',
['gv'] = 'kʷ',
[''] = 'ʔ',
['h'] = 'h',
}
-- [bmfvdnslghrcy]?[gbd]?[vy]?
local vowelConv = {
['a'] = { alone = 'a', wfinal = 'aː' },
['e'] = { alone = 'e', wfinal = 'eː' },
['i'] = { alone = 'i', wfinal = 'i' },
['o'] = { alone = 'o', wfinal = 'oː' },
['u'] = { alone = 'u', wfinal = 'u' },
['w'] = { alone = 'ɯ', wfinal = 'ɯ' },
['ai'] = { alone = 'aːi', wfinal = false },
['ei'] = { alone = 'ei', wfinal = false },
['oi'] = { alone = 'oːi', wfinal = false },
['ui'] = { alone = 'uːi', wfinal = false },
['wi'] = { alone = 'ɯːi', wfinal = false },
['ae'] = { alone = 'ai', wfinal = 'a' },
['ie'] = { alone = false, wfinal = 'iː' },
['oe'] = { alone = false, wfinal = 'o' },
['ue'] = { alone = false, wfinal = 'uː' },
['we'] = { alone = false, wfinal = 'ɯː' },
['au'] = { alone = 'aːu', wfinal = false },
['aeu'] = { alone = 'au', wfinal = false },
['eu'] = { alone = 'eːu', wfinal = false },
['iu'] = { alone = 'iu', wfinal = false },
['ou'] = { alone = 'ou', wfinal = false },
['aw'] = { alone = 'aɯ', wfinal = false },
}
-- [aeiouw][ieu]?[uw]?
-- w/ final only: [aeiouw]e?
-- cannot be w/ final: ai, ei, oi, ui, wi, au, aeu, eu, iu, ou, aw // [aeiouw]e?[iuw]
-- cannot be w/o final: ie, oe, ue // [iou]e
local finalConv = {
[''] = '',
['m'] = 'm',
['n'] = 'n',
['ng'] = 'ŋ',
['p'] = 'p',
['b'] = 'p',
['t'] = 't',
['d'] = 't',
['k'] = 'k',
['g'] = 'k',
}
-- [mnpbtdkg]?g?
local toneConv = {
['1'] = '˨˦', --24
['2'] = '˧˩', --31 z
['3'] = '˥', --55 j
['4'] = '˦˨', --42 x
['5'] = '˧˥', --35 q
['6'] = '˧', --33 h
['7'] = '˥', --55
['7:'] = '˧˥', --35
['8'] = '˧', --33
}
local toneConvToNumbers = {
[''] = '1',
['z'] = '2',
['j'] = '3',
['x'] = '4',
['q'] = '5',
['h'] = '6',
}
local toneConvFromNumbers = {
['1'] = '',
['2'] = 'z',
['3'] = 'j',
['4'] = 'x',
['5'] = 'q',
['6'] = 'h',
['7'] = '',
['7:'] = '',
['8'] = '',
}
local consonantConv_1957 = {
['mb'] = 'ƃ',
['nd'] = 'ƌ',
['ng'] = 'ŋ',
['ngv'] = 'ŋv',
}
local vowelConv_1957 = {
['oe'] = 'ɵ',
['ae'] = 'ə',
['w'] = 'ɯ',
}
local toneConv_1957 = {
['1'] = '',
['2'] = 'ƨ',
['3'] = 'з',
['4'] = 'ч',
['5'] = 'ƽ',
['6'] = 'ƅ',
['7'] = '',
['7:'] = '',
['8'] = '',
}
local function fix(text)
local output = {}
for word in gmatch(text, '\'?[A-Za-z]+[^A-Za-z]*') do
local apostrophe, word, nonword = match(word, '(\'?)([A-Za-z]+)([^A-Za-z]*)')
word = gsub(word, '[zjxq]', toneConvToNumbers) -- excludes h which is ambiguously tone or consonant
-- /CV-CV/...=<CVCV>...
-- /CVC-V/...=<CVC'V>...
-- regex (pattern?) wildcards are greedy from the beginning of the string
-- so counteract this by reversing the string
-- so if we look for "([CVC])" it will first match what was originally the last CVC sequence
-- (or something)
word = reverse(word)
word = '|' .. gsub(word, '(g?[mnpbtdkg]?)([ieu]?[uw]?[aeiouwAEIUOUW]+)([vy]?[gbd]?[bmfvdnslghrcyBMFVDNSLGHRCY]?)', '%1%2%3|')
-- "+" seems to be needed after "[aiueow]"
-- correct: "daeuz"→"daeuz" wrong: "daeuz"→"da|euz"
word = reverse(word)
mw.log('za1>' .. word)
-- fix bad initial consonant: "|hya"→"h|ya", "|ngya"→"n|gya"
word = gsub(word, '(|)([^aiueow])([^aiueow])([^aiueow]?)([aiueow])', function(x,a,b,c,d)
if not initialConv[lower(a..b..c)] then
return a..x..b..c..d
end
end)
word = gsub(word, '([aiueow]+)([mnpbtdkg]g?)(|)', function(v,c,x)
-- if there is a final consonant,
if c ~= '' then
-- and vowel sequence is not a sequence that only appears before finals,
if not match(v, '^[aeiouw]e?$') then
-- detect valid ...VC sequence at end of string
return reverse(gsub(reverse(v..c..x), '(|)([^aiueow]+)(e?[aeiouw])', '%1%2%3|'))
end
end
end)
word = gsub(word, '|gvu', 'g|vu')
mw.log('za2>' .. word)
word = gsub(word, 'h|', '6|')
word = gsub(word, '([A-Za-z]+)|', function(a)
if match(a, '[ptk]$') then
return a..'7|'
elseif match(a, '[bdg]$') and not match(a, 'ng$') then
return a..'8|'
else
return a..'1|'
end
end)
mw.log('za3>' .. word)
table.insert(output, apostrophe .. gsub(word, '|', '') .. nonword)
end
return table.concat(output)
end
function export.convert(text, scheme, new_bor)
if type(text) == "table" then
text, scheme, new_bor = text.args[1], text.args[2], text.args['new_bor']
end
local converted = {}
local extra_pre = match(text, '^[^A-Za-z]*')
text = fix(text)
mw.log('za4>' .. text)
for syllable in gmatch(text, '[A-Za-z]+%d[^A-Za-z]*') do
local initial, vowel, final, tone, extra = match(syllable, '^([BMFVDNSLGHRCYbmfvdnslghrcy]?[gbd]?[vy]?)([AEIOUWaeiouw][ieu]?[uw]?)([mnpbtdkg]?g?)(%d)([^A-Za-z]*)$')
local caps = false
mw.log('za5>' .. initial, vowel, final, tone, extra)
if find(initial .. vowel .. final, '[A-Z]') then
caps = true
initial, vowel, final = lower(initial), lower(vowel), lower(final)
end
if scheme == 'IPA' then
initial = initialConv[initial]
vowel = final == '' and vowelConv[vowel].alone or vowelConv[vowel].wfinal
final = finalConv[final]
if tone == '7' and find(vowel, 'ː') then
tone = '7:'
elseif new_bor and tone == '1' then
tone = '5'
end
tone = toneConv[tone]
syllable = initial .. vowel .. final .. tone
table.insert(converted, syllable)
elseif scheme == 'old' then
initial = consonantConv_1957[initial] or initial
vowel = gsub(vowel, '[oa]e', vowelConv_1957)
vowel = gsub(vowel, 'w', vowelConv_1957)
final = consonantConv_1957[final] or final
tone = toneConv_1957[tone]
if vowel == 'ə' and final == '' then
vowel = 'əi'
elseif vowel == 'aɯ' and final == '' then
vowel = 'əɯ'
end
syllable = initial .. vowel .. final .. tone .. extra
if caps then syllable = gsub(syllable, '^(.)', upper) end
table.insert(converted, syllable)
elseif scheme == 'hyphenation' then
tone = toneConvFromNumbers[tone]
extra = gsub(extra, '\'', '')
syllable = initial .. vowel .. final .. tone .. extra
if caps then syllable = gsub(syllable, '^(.)', upper) end
table.insert(converted, syllable)
elseif scheme == 'tone_numbers' then
if new_bor and tone == '1' then
tone = '5'
end
extra = gsub(extra, '\'', '')
syllable = initial .. vowel .. final .. '<sup>' .. tone .. '</sup>' .. extra
if caps then syllable = gsub(syllable, '^(.)', upper) end
table.insert(converted, syllable)
elseif scheme == 'raw_syllables' then
table.insert(converted, syllable)
else
error('Convert to what representation?')
end
end
if scheme == 'IPA' then
converted = '/' .. table.concat(converted, ' ') .. '/'
elseif scheme == 'old' then
converted = extra_pre .. table.concat(converted, '')
converted = mw.ustring.gsub(mw.ustring.gsub(converted, "([6Ƅƅ])'", "%1"), "([6Ƅƅ])'", "%1")
elseif scheme == 'hyphenation' then
converted = gsub(extra_pre .. table.concat(converted, '‧'), ' ', '')
elseif scheme == 'tone_numbers' then
converted = extra_pre .. table.concat(converted, '')
elseif scheme == 'raw_syllables' then
-- (pass)
end
return converted
end
function export.show(frame)
local params = {
[1] = { },
['new_bor'] = { type = "boolean" },
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local text, new_bor = args[1], args['new_bor']
if not text then text = mw.title.getCurrentTitle().text end
local ret = {}
table.insert(
ret,
require("Module:accent qualifier").format_qualifiers(lang, {"Standard Zhuang"}) ..
" " ..
require("Module:IPA").format_IPA_full {
lang = lang,
items = {{ pron = export.convert(text, "IPA", new_bor) }}
}
)
table.insert(
ret,
'Tone numbers: ' ..
export.convert(text, 'tone_numbers', new_bor)
)
table.insert(
ret,
'Hyphenation: ' ..
export.convert(text, 'hyphenation', new_bor) ..
'[[Category:Zhuang ' .. #export.convert(text, 'raw_syllables') .. '-syllable words]]'
)
return table.concat(ret, '\n* ')
end
function export.is_latin(frame)
local text = frame.args[1]
if find(text, '[ƂƃƋƌŊŋƏəƟɵƜɯƧƨЗзЧчƼƽƄƅ]') then
return ''
elseif find(text, '[A-Za-z]') then
return 'y'
else
return '' -- CJK is too much of a pain to detect
end
end
return export