Jump to content

Module:mr-IPA

From Wiktionary, the free dictionary


local export = {}

local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")

local find = m_str_utils.find
local gcodepoint = m_str_utils.gcodepoint
local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local u = m_str_utils.char

local lang = require("Module:languages").getByCode("mr")
local sc = require("Module:scripts").getByCode("Deva")

local correspondences = {
	["ṅ"] = "ŋ", ["g"] = "ɡ", 
    ["c"] = "t͡ɕ", ["j"] = "d͡ʑ", ["ċ"] = "t͡s", ["j̈"] = "d͡z", ["ñ"] = "n",
	["ṭ"] = "ʈ", ["ḍ"] = "ɖ", ["ṇ"] = "ɳ",
	["t"] = "t̪", ["d"] = "d̪",
	["y"] = "j", ["r"] = "ɾ", ["v"] = "ʋ", ["l"] = "l̪", ["ḷ"] = "ɭ̆",
	["ś"] = "ɕ", ["ṣ"] = "ʂ", ["h"] = "ɦ",
	["ṛ"] = "ɽ", ["ž"] = "z", ["ḻ"] = "ɭ", ["ġ"] = "ɡ", ["q"] = "k", ["x"] = "kʰ", ["ṉ"] = "n", ["ṟ"] = "ɾ",

	["a"] = "ə", ["ā"] = "a", ["i"] = "i",
	["ī"] = "i", ["o"] = "o", ["e"] = "e",
	["u"] = "u", ["ū"] = "u", ["ŏ"] = "ɔ", ["ĕ"] = "æ",

	["ũ"] = "ũ", ["õ"] = "õ", ["ã"] = "ə̃", ["ā̃"] = "ã", 

	["ॐ"] = "oːm", ["ḥ"] = "ʰ",  [" "] = "",
}

local deaspirate = {
    ["ख"] = "क", ["घ"] = "ग",
    ["छ"] = "च", ["झ"] = "ज",
    ["ठ"] = "ट", ["ढ"] = "ड",
    ["थ"] = "त", ["ध"] = "द",
    ["फ"] = "प", ["भ"] = "ब"
}

local vowels = "aāiīuūoŏĕɔɛeæ"
local weak_h_c = "gjdḍṇbṛnmrṟlv"
local weak_h = "([" .. weak_h_c .. "])h"
local aspirate = "([kcċtṭp])"
local syllabify_pattern = "([" .. vowels .. "]+)([^" .. vowels .. "%.]+)([" .. vowels .. "]+)"

local function find_consonants(text)
	local current = ""
	local cons = {}
	for cc in gcodepoint(text .. " ") do
		local ch = u(cc)
		if find(current .. ch, "^[kgṅcjñṭḍṇtdnpbmyrlvśṣshqxġzžḻṛṟfθṉċ]̈?$") or find(current .. ch, "^[kgcjṭḍṇtdpbṛṟnmrlvj]̈?h$") then
			current = current .. ch
		else
			table.insert(cons, current)
			current = ch
		end
	end
	return cons
end

local function syllabify(text)
	for count = 1, 2 do
		text = gsub(text, syllabify_pattern, function(a, b, c)
			b_set = find_consonants(b)
			table.insert(b_set, #b_set > 1 and 2 or 1, ".")
			return a .. table.concat(b_set) .. c end)
	end
	return text
end

local identical = "knlsfzθ"
for character in gmatch(identical, ".") do
	correspondences[character] = character
end

local function transliterate(text)
	return (lang:transliterate(text))
end

function export.link(term)
	return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
end

function export.toIPA(text, phonetic)
	--fix how aspiration is represented
	text = gsub(text, '([खघछझठढथधफभ])(़?्)([खघछझठढथधफभ])', function(a, b, c)
		if a == c then return deaspirate[a] .. b .. c end
		return a .. b .. c
	end)

	local translit = transliterate(text)
	if not translit then
		error('The term "' .. text .. '" could not be transliterated (is it in the correct script?).')
	end
	if phonetic then
		translit = gsub(translit, 'ts', 'ċh') -- ts realization
		translit = gsub(translit, '([^' .. vowels .. '])aha([^' .. vowels .. '])', '%1a%2') -- schwa dropping
		translit = gsub(translit, '([mnlvrdbj])ah([aāuūoŏĕɔɛeæ])', '%1h%2') -- schwa dropping
		translit = gsub(translit, '([mnlvrdbj])([' .. vowels .. '])h([' .. vowels .. '])', '%1h%2%3') -- murmur + aspiration rules
		translit = gsub(translit, '([^' .. vowels .. '])ah([' .. vowels .. '])', '%1%2') -- schwa + h dropping
		translit = gsub(translit, '([^' .. vowels .. '])([' .. vowels .. '])ha([^' .. vowels .. '])', '%1%2%3') -- schwa + h dropping 2
		translit = gsub(translit, '([^' .. vowels .. '])([' .. vowels .. '])h([' .. vowels .. '])', '%1%2%3') -- h dropping
	end
	
	-- vowels
	translit = gsub(translit, "͠", "̃")
	translit = gsub(translit, 'a(̃?)i', 'əi%1')
	translit = gsub(translit, 'a(̃?)u', 'əu%1')
	translit = gsub(translit, "%-", ".")
	translit = gsub(translit, "ŕ", "ru")
	
	-- schwa force
	translit = gsub(translit, "%*", "a")
	
	translit = syllabify(translit)
	
	-- clusters
	translit = gsub(translit, 'ndny', 'ndñ')
	translit = gsub(translit, 'dny', 'dñ')
	if phonetic then translit = gsub(translit, 'ts', 't͡sʰ') end
	
	if phonetic then
		translit = gsub(translit, '([' .. weak_h_c .. '])āh', '%1hā')
		translit = gsub(translit, aspirate .. "h", '%1ʰ')
		translit = gsub(translit, weak_h, '%1ʱ')
		translit = gsub(translit, '([' .. weak_h_c .. '])%.h', '.%1ʱ')
		translit = gsub(translit, aspirate .. '%.h', '.%1ʰ')
	end
	
	if phonetic then
		translit = gsub(translit, '([^' .. vowels .. '])([' .. vowels .. '])h%.', '%1%2.') -- more h dropping
		translit = gsub(translit, '([^' .. vowels .. '])([' .. vowels .. '])h$', '%1%2') -- more h dropping
	end
	
	translit = gsub(translit, "%.ː", "ː.")

	-- aspiration
	translit = gsub(translit, "([kgṅcjñṭḍṇtdnpbmyrlvśṣsqxġzžḻṛṟfθṉċj̈])h", "%1ʰ")
	translit = gsub(translit, "([gjdḍṇbṛnmrṟlvj̈])ʰ", "%1ʱ")
	translit = gsub(translit, "([gjdḍṇbṛnmrṟlvj̈])%.h", ".%1ʱ")
	
	local result = gsub(translit, ".̈?", correspondences)
	
	-- formatting
	result = gsub(result, "ː̃", "̃ː")
	result = gsub(result, "ː.̃", "̃ː.")
	result = gsub(result, "%. ", " ")
    result = gsub(result, "%.$", "")

	if phonetic then
		-- lengthening of i/u in final syllables (Dhongde & Wali p. 9)
		result = gsub(result, "i([^%.]*)$", "iː%1")
		result = gsub(result, "u([^%.]*)$", "uː%1")
	end
	
	return result
end

function export.make(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	
	local p, results = {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		p = { pagetitle }
	end
	
	for _, Marathi in ipairs(p) do
		local broad, narrow = export.toIPA(Marathi, false), export.toIPA(Marathi, true)
		table.insert(results, { pron = "/" .. broad .. "/" })
		if broad ~= narrow then
			table.insert(results, { pron = "[" .. narrow .. "]" })
		end
	end
	
	return m_IPA.format_IPA_full { lang = lang, items = results }
end

return export