Module:urk-common
Appearance
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local find = mw.ustring.find
local len = mw.ustring.len
local match = mw.ustring.match
local sub = mw.ustring.sub
export.syllable_pattern = "([เแโ]?)([กคงจชซฌญดตทนบปพฟมยรลวอฮ]ฺ?)([รล]?)([อาัิีึึืุู]?ว?)([ยะ]?)([กงจดนบวมลฮํ]?)"
-- tokenise an entry into its syllables
function export.syllabise(entry, perform_respell)
local syllables = {}
local idx = 1
while idx <= len(entry) do
-- leave non-thai characters alone
if sub(entry, idx, idx) == " " or not match(sub(entry, idx, idx), "[ก-๎]") then
table.insert(syllables, sub(entry, idx, idx))
idx = idx + 1
else
-- initialise syllabification
local v_pref, i, m, v_suf, f_pref, f_suf = match(sub(entry, idx), export.syllable_pattern)
local match_length = len(v_pref .. i .. m .. v_suf .. f_pref .. f_suf)
-- prevent initial consonant in the next syllable being
-- misinterpreted as the final consonant in the current syllable
if idx + match_length <= len(entry) and find(sub(entry, idx + match_length, idx + match_length), "[อาัิีึึืุู]") then
f_pref = ""
f_suf = ""
match_length = len(v_pref .. i .. m .. v_suf)
end
-- "ะ" can only have "ฮ" as its second segment
if f_pref == "ะ" and f_suf ~= "ฮ" then
f_suf = ""
match_length = len(v_pref .. i .. m .. v_suf .. f_pref)
end
-- perform respellings
if perform_respell then
-- syllables with non-approximant syllable-final have vowel "โ" by default
if v_pref == "" and v_suf == "" and find(f_suf, "[กงดนบม]") then
v_pref = "โ"
-- syllables with syllable-final "ะ" have vowel "ั" by default
elseif v_pref == "" and v_suf == "" and f_pref == "ะ" and f_suf == "" then
v_suf = "ั"
-- syllables with explicitly short vowel have syllable-final "ะ" by default
elseif find(v_suf, "[ัิุ]") and f_pref == "" and f_suf == "" then
f_pref = "ะ"
-- syllables with "ว" and syllable-final actually have vowel "ัว"
elseif v_suf == "ว" and f_pref ~= "" then
v_suf = "ัว"
end
end
-- "ว" cannot be part of the vowel if "ั" does not precede it
if match(v_suf, "ว") and v_suf ~= "ัว" then
v_suf = sub(v_suf, 1, 1)
match_length = len(v_pref .. i .. m .. v_suf)
end
-- construct respelt syllable
table.insert(syllables, v_pref .. i .. m .. v_suf .. f_pref .. f_suf)
idx = idx + match_length
end
end
return syllables
end
return export