Jump to content

Module:fa-IPA/harakat-ira

From Wiktionary, the free dictionary

This module can create phonetic, fully vocalized, Persian text from an Iranian romanization. It is meant to be used by {{fa-IPA}} to create phonetic spellings for Iranian Persian using modern Iranian vocalization, but if needed elsewhere it could be invoked elsewhere with {{xlit}}.

For the equivalent module for Classical Persian and Dari, see {{Module:fa-IPA/harakat}}


local rsubn = mw.ustring.gsub
local U = mw.ustring.char
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F) 
local tashdid = U(0x651) -- also called shadda
local jazm = U(0x652) -- sukoon
local he = "ه"
local zwnj = U(0x200C)
local highhmz = U(0x654)


local convert_consonants = {
-- STOP! fa-IPA should remove incorrect characters, 
-- if an incorrect character is appearing, check fa_IPA not here
	["b"] = "ب", 
	["č"] = "چ", 
	["d"] = "د", 
	["f"] = "ف", 
	["g"] = "گ", 
	["ğ"] = "غ", 
	["h"] = he, 
	["j"] = "ج", 
	["k"] = "ک", 
	["l"] = "ل", 
	["m"] = "م", 
	["n"] = "ن", 
	["p"] = "پ", 
	["q"] = "ق", 
	["r"] = "ر", 
	["s"] = "س", 
	["š"] = "ش",
	["t"] = "ت", 
	["ɖ"] = "د", --only for Hazaragi
	["ʈ"] = "ت", --only for Hazaragi
	["w"] = "و", 
	["v"] = "و", 
	["x"] = "خ", 
	["y"] = "ی", 
	["z"] = "ز", 
	["ž"] = "ژ", 
	["'"] = "ئ",
}

local convert_vowels = {
	["a"] = zabar, ["â"] = "ا", ["e"] = zer, 
	["o"] = pesh, ["u"] = "و", ["i"] = "ی",
}

local vowels = "aeoiu" --including â causes issues
local consonants = "bptjčxdrzžsš'ğfqkglmnwvwhy"
local dc_consonants = "âdrwvuzž"..jazm..""

function export.tr(text, lang, sc)
	text = rsubn(text, "([%(%)])", "")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "`", "")
	text = rsubn(text, ",".." ", ",")
	text = rsubn(text, ",", "] ,[")
	text = rsubn(text, "%]", "#]#")
	text = rsubn(text, "%[", "#[#")
	
	-- remove unpronounced or incorrect letters
	text = rsubn(text, "[.]", "")
	text = rsubn(text, "([aeo]h)#", "%1"..jazm.."#")
	-- prevent ezafe from being processed
	text = rsubn(text, "(["..consonants.."])([-])e#", "%1_e_")
	text = rsubn(text, "([âu])([-])ye#", "%1_ye_#")
	text = rsubn(text, "([i])([-])ye#", "%1yye_#")
	text = rsubn(text, "([y])([-])ye#", "%1ye_#")
	text = rsubn(text, "iy", "ey")
	text = rsubn(text, "(["..consonants.."])%1", "%1"..tashdid.."")
	text = rsubn(text, "(["..consonants.."])(["..consonants.."])", "%1"..jazm.."%2")
	text = rsubn(text, "(["..consonants.."])(["..consonants.."])", "%1"..jazm.."%2")
	-- needs to be repeated for overlapping patterns
	text = rsubn(text, "#â", "#آ")
	text = rsubn(text, "o'", "oؤ")
	text = rsubn(text, "e'", "eئ")
	text = rsubn(text, "'â", "آ")
	text = rsubn(text, "([aeo])([-])", "%1h-")
	text = rsubn(text, "(["..dc_consonants.."])([-])â", "%1"..jazm.."آ")
	text = rsubn(text, "([^"..dc_consonants.."])([-])â", "%1"..zwnj.."آ")
	text = rsubn(text, "(["..dc_consonants.."])([-])(["..vowels.."])", "%1"..jazm.."â%3")
	text = rsubn(text, "([^"..dc_consonants.."])([-])(["..vowels.."])", "%1"..zwnj.."â%3")
	text = rsubn(text, "(["..dc_consonants.."])([-])(["..consonants.."])", "%1"..jazm.."%3")
	text = rsubn(text, "([^"..dc_consonants.."])([-])(["..consonants.."])", "%1"..zwnj.."%3")
	text = rsubn(text, "#(["..vowels.."])", "#â%1")
	text = rsubn(text, "([aeo])#", "%1h#")
	-- try to find ezafe markings
	text = rsubn(text, "([aeo]h)("..zwnj.."yeh)#", "%1"..highhmz.."")
	text = rsubn(text, "([aeo]h)("..zwnj.."âeh)#", "%1"..highhmz.."")
	text = rsubn(text, "([âu])_ye_#", "%1ye#")
	text = rsubn(text, "%_", "")
	text = rsubn(text, "(['])#", "ء#")
	text = mw.ustring.gsub(text, '.', convert_consonants)
	text = mw.ustring.gsub(text, '.', convert_vowels)
	
	text = rsubn(text, "[-]", "")
	text = rsubn(text, "#", "")
	text = rsubn(text, "%[".." ", "[") --this prevents weird spacing
	return text
end

return export