Jump to content

Module:fa-IPA/harakat

From Wiktionary, the free dictionary

This module can create phonetic, fully vocalized, Persian text from a Classical Romanization. It is meant to be used by {{fa-IPA}} to create phonetic spellings for Classical Persian and Dari using traditional vocalization, but if needed elsewhere it could be invoked elsewhere with {{xlit}}.

For the equivalent module for Iranian Persian, see {{Module:fa-IPA/harakat-ira}}


local rsubn = mw.ustring.gsub
local U = mw.ustring.char
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = U(0x652) -- also called sukoon
local he = "ه"
local zwnj = U(0x200C)
local highhmz = U(0x654)
local pitchaccent = U(0x301)

local convert_consonants = {
	--STOP, fa-IPA should remove incorrect characters,
	-- if an incorrect character is appearing check there!
	["ḇ"] = "ڤ",
	["ḏ"] = "ذ",
	["b"] = "ب", 
	["č"] = "چ", 
	["d"] = "د", 
	["f"] = "ف", 
	["g"] = "گ", 
	["ğ"] = "غ", 
	["h"] = he, 
	["j"] = "ج", 
	["k"] = "ک", 
	["l"] = "ل", 
	["m"] = "م", 
	["n"] = "ن", 
	["p"] = "پ", 
	["q"] = "ق", 
	["r"] = "ر", 
	["s"] = "س", 
	["š"] = "ش",
	["t"] = "ت", 
	["ḍ"] = "ڈ", --only for Hazaragi
	["ṭ"] = "ٹ", --only for Hazaragi
	["w"] = "و", 
	["x"] = "خ", 
	["y"] = "ی", 
	["z"] = "ز", 
	["ž"] = "ژ", 
	["'"] = "ئ",

}

local convert_vowels = {
	["a"] = zabar, ["ā"] = "ا", ["i"] = zer, ["u"] = pesh,
}

local vowels = "aiuēīōū" --including ā causes issues
local consonants = "bḇβptṭjčxdðḏḍrzžsšğ'fqkglmnhwy"
local dc_consonants = "ādḍðrzžw"..jazm..""

function export.tr(text, lang, sc)
	text = rsubn(text, "([%(%)])", "")
	text = rsubn(text, pitchaccent, "")
	text = rsubn(text, " | ", "# | #")
	text = "##" .. rsubn(text, " ", "# #") .. "##"
	text = rsubn(text, "`", "")
	text = rsubn(text, ",".." ", ",")
	text = rsubn(text, " "..",", ",")
	text = rsubn(text, ",", "] ,[")
	text = rsubn(text, "%]", "#]#")
	text = rsubn(text, "%[", "#[#")
	
	text = rsubn(text, "v", "w") 
	text = rsubn(text, "#u#", "#ؤ#") 
	text = rsubn(text, "e#", "i-yi#")
	text = rsubn(text, "e([h'])", "i%1")
	text = rsubn(text, "o([h'])", "u%1")
	text = rsubn(text, "([aiu]h)([^"..vowels.."])#", "%1"..jazm.."%2#")
	--prevent izafa from being processed
	text = rsubn(text, "(["..consonants.."])([-])i#", "%1_i_#")
	text = rsubn(text, "([āōū])([-])yi#", "%1_yi_#")
	text = rsubn(text, "([ēīy])([-])yi#", "%1yi_#")
	text = rsubn(text, "īy", "iy")
	text = rsubn(text, "(["..consonants.."])%1", "%1"..tashdid.."")
	text = rsubn(text, "(["..consonants.."])(["..consonants.."])", "%1"..jazm.."%2")
	text = rsubn(text, "(["..consonants.."])(["..consonants.."])", "%1"..jazm.."%2")
	--must be repeated for overlapping patterns
	text = rsubn(text, "#ā", "#آ")
	text = rsubn(text, "u'", "uؤ")
	text = rsubn(text, "i'", "iئ")
	text = rsubn(text, "'ā", "آ")
	text = rsubn(text, "'u", "ؤu")
	text = rsubn(text, "'i", "ئi")
	text = rsubn(text, "([aiu])([-])", "%1h-")
	text = rsubn(text, "(["..dc_consonants.."])([-])ā", "%1"..jazm.."آ")
	text = rsubn(text, "([^"..dc_consonants.."])([-])ā", "%1"..zwnj.."آ")
	-- pair lonely vowels to an alif
	text = rsubn(text, "(["..dc_consonants.."])([-])(["..vowels.."])", "%1"..jazm.."ā%3")
	text = rsubn(text, "([^"..dc_consonants.."])([-])(["..vowels.."])", "%1"..zwnj.."ā%3")
	text = rsubn(text, "(["..dc_consonants.."])([-])(["..consonants.."])", "%1"..jazm.."%3")
	text = rsubn(text, "([^"..dc_consonants.."])([-])(["..consonants.."])", "%1"..zwnj.."%3")
	text = rsubn(text, "#(["..vowels.."])", "#ā%1")
	text = rsubn(text, "[-]", "")
	text = rsubn(text, "([aiu])#", "%1h#")
	-- try to find izafa marks and unprocess them
	text = rsubn(text, "([aiu]h)("..zwnj.."yi)h#", "%1"..highhmz.."#")
	text = rsubn(text, "([aiu]h)("..zwnj.."āi)h#", "%1"..highhmz.."#")
	text = rsubn(text, "([āōū])_yi_#", "%1yi#")
	text = rsubn(text, "(['])#", "ء#")
	-- these need to happen last so they are not marked as unpaired consonants
	text = rsubn(text, "ū", "uw")
	text = rsubn(text, "ī", "iy")
	text = rsubn(text, "ē", "y")
	text = rsubn(text, "ō", "w")
	text = rsubn(text, "#ؤ#", "ؤu")
	text = rsubn(text, "%_", "")
	text = mw.ustring.gsub(text, '.', convert_consonants)
	text = mw.ustring.gsub(text, '.', convert_vowels)
	
	text = rsubn(text, "[.]", "")
	text = rsubn(text, "#", "")
	text = rsubn(text, "%[".." ", "[") --this prevents weird spacing
	return text
end

return export