Jump to content

Module:ain-pron

From Wiktionary, the free dictionary


local export = {}

-- TODO: x /x/ ?
local CONSONANTS = {
    "p", "t", "c", "k",
    "m", "n", "s", "h",
    "w", "r", "y", "'"
}

local VOWELS = {
    "a", "i", "u", "e", "o",
    "á", "í", "ú", "é", "ó",
}

local CONVERSION_TABLE = {
    -- Consonants
    ["p"] = "p", ["t"] = "t", ["c"] = "t͡ʃ", ["k"] = "k",
    ["m"] = "m", ["n"] = "n", ["s"] = "s",  ["h"] = "h",
    ["w"] = "w", ["r"] = "ɾ", ["y"] = "j",  ["'"] = "",
    -- Vowels
    ["a"] = "a", ["i"] = "i", ["u"] = "u", ["e"] = "e", ["o"] = "o",
    ["á"] = "á", ["í"] = "í", ["ú"] = "ú", ["é"] = "é", ["ó"] = "ó"
}

-- Append Glottal Stop ruunpe -> ruʔunpe / -> teʔeta
local CONVERSION_TABLE_PHONETIC = {
    -- Consonants
    ["p"] = "p", ["t"] = "t", ["c"] = "t͡ʃ", ["k"] = "k",
    ["m"] = "m", ["n"] = "n", ["s"] = "s",  ["h"] = "h",
    ["w"] = "w", ["r"] = "ɾ", ["y"] = "j",  ["'"] = "ʔ",
    -- Vowels
    ["a"] = "a", ["i"] = "i", ["u"] = "u", ["e"] = "e", ["o"] = "o",
    ["á"] = "á", ["í"] = "í", ["ú"] = "ú", ["é"] = "é", ["ó"] = "ó"
}

-- as -> aʃ ( /e.ɾa.mus.ka.ɾe/ -> /e.ɾa.mus.ka.ɾe/ [/e.ɾa.muʃ.ka.ɾe/] )
local SPECIAL_CODA = {
    ["s"] = "ʃ", ["p"] = "p̚", ["k"] = "k̚", ["t"] = "t̚",
}

function in_values(item, items)
    for _, v in pairs(items) do
        if v == item then
            return true
        end
    end
    return false
end

function in_keys(item, items)
    for k, _ in pairs(items) do
        if k == item then
            return true
        end
    end
    return false
end

local function convert_syllable(syllable)
    -- yay > jaj
    -- mur > muɾ
    -- an > ʔan
    -- ka > ka
    local result = ""
    for char in mw.ustring.gmatch(syllable, ".") do
        result = result .. CONVERSION_TABLE[char]
    end
    return result
end

local function convert_syllable_phonetic(syllable)
    local result = ""

    if not in_values(mw.ustring.sub(syllable, 1, 1), CONSONANTS) then
        syllable = "'" .. syllable
    end

    for char in mw.ustring.gmatch(syllable, ".") do
        result = result .. CONVERSION_TABLE_PHONETIC[char]
    end

    local l_result = mw.ustring.len(result)

    local coda = ""
    if l_result > 1 then
        coda = mw.ustring.sub(syllable, l_result, l_result)
        if in_keys(coda, SPECIAL_CODA) then
            coda = SPECIAL_CODA[coda]
            result = mw.ustring.sub(result, 1, l_result - 1) .. coda
        end
    end

    return result
end

-- local function print_groups(group_ids, temp)
--     local str_buffer = ""
--     for i = 1, #temp do
--         if group_ids[i] ~= nil then
--             str_buffer = str_buffer .. group_ids[i]
--         else
--             str_buffer = str_buffer .. "X"
--         end
--     end
--     print("group_indicies: " .. str_buffer)
--     print("group_contents: " .. temp)
-- end

local function do_convert(romanized)
    local group_ids = {}

    -- Count syllables by vowels and save to a map with onset and nucleus marked
    local syllable_count = 1
    local i = 1
    for char in mw.ustring.gmatch(romanized, ".") do
        -- print("Current Char (No. " .. tostring(i) .. "): " .. char)
        if in_values(char, VOWELS) then
            -- print("-- Current Vowel: " .. char)
            -- print("-- Char Before: " .. mw.ustring.sub(romanized, i - 1, i - 1))
            if in_values(mw.ustring.sub(romanized, i - 1, i - 1), CONSONANTS) then
                -- print("---- Char Before is Consonant")
                group_ids[i - 1] = syllable_count
            end
            group_ids[i] = syllable_count

            syllable_count = syllable_count + 1
        end
        i = i + 1
    end

    -- print_groups(group_ids, romanized)

    -- Fill codas
    local i = 1
    for char in mw.ustring.gmatch(romanized, ".") do
        if group_ids[i] == nil then
            group_ids[i] = group_ids[i - 1]
        end
        i = i + 1
    end

    -- print_groups(group_ids, romanized)
    
    local result = {}
    local i = 1
    local current_group_id = 1
    local head = 1
    local tail = 1
    local content = ""
    while i < mw.ustring.len(romanized) do
        -- print("group_id " .. tostring(group_ids[i]) .. "  " .. tostring(mw.ustring.sub(romanized, i, i)))
        if group_ids[i] ~= current_group_id then
            current_group_id = group_ids[i]
            tail = i - 1
            content = mw.ustring.sub(romanized, head, tail)
            -- print(convert_syllable(content))
            table.insert(result, convert_syllable(content))
            head = i
        end
        i = i + 1
    end
    content = mw.ustring.sub(romanized, head, mw.ustring.len(romanized))
    table.insert(result, convert_syllable(content))
    
    return table.concat(result, ".")
end


local function convert_phonetic(romanized)
    local group_ids = {}

    -- Count syllables by vowels and save to a map with onset and nucleus marked
    local syllable_count = 1
    local i = 1
    for char in mw.ustring.gmatch(romanized, ".") do
        if in_values(char, VOWELS) then
            if in_values(mw.ustring.sub(romanized, i - 1, i - 1), CONSONANTS) then
                -- Char Before is Consonant
                group_ids[i - 1] = syllable_count
            end
            group_ids[i] = syllable_count

            syllable_count = syllable_count + 1
        end
        i = i + 1
    end

    -- Fill codas
    local i = 1
    for char in mw.ustring.gmatch(romanized, ".") do
        if group_ids[i] == nil then
            group_ids[i] = group_ids[i - 1]
        end
        i = i + 1
    end

    local result = {}
    local i = 1
    local current_group_id = 1
    local head = 1
    local tail = 1
    local content = ""
    while i < mw.ustring.len(romanized) do
        if group_ids[i] ~= current_group_id then
            current_group_id = group_ids[i]
            tail = i - 1
            content = mw.ustring.sub(romanized, head, tail)
            table.insert(result, convert_syllable_phonetic(content))
            head = i
        end
        i = i + 1
    end
    content = mw.ustring.sub(romanized, head, mw.ustring.len(romanized))
    table.insert(result, convert_syllable_phonetic(content))
    
    

    local result = table.concat(result, ".")
    -- TODO: Phonetic Transcription: konkane /kon.ka.ne/ [koŋ.ɡa.ne] / ʔ
    result = mw.ustring.gsub(result, "n%.k", "ŋ.k")
    result = mw.ustring.gsub(result, "m%.k", "ŋ.k")
    result = mw.ustring.gsub(result, "n%.m", "m.m")
    result = mw.ustring.gsub(result, "n%.p", "m.p")
    result = mw.ustring.gsub(result, "si", "ʃi")
    result = mw.ustring.gsub(result, "u%.ʔa", "u.wa")
    result = mw.ustring.gsub(result, "u%.ʔe", "u.we")
    result = mw.ustring.gsub(result, "u%.ʔo", "u.wo")
    result = mw.ustring.gsub(result, "i%.ʔa", "i.ja")
    result = mw.ustring.gsub(result, "i%.ʔe", "i.je")
    result = mw.ustring.gsub(result, "i%.ʔo", "i.jo")
    result = mw.ustring.gsub(result, "ɾ%.n", "n.n")
    result = mw.ustring.gsub(result, "ɾ%.t", "t.t")
    result = mw.ustring.gsub(result, "ɾ%.ɾ", "n.ɾ")
    
    return result
 
end


-- local function valid_ainu_word(word)
--     -- TODO:
-- end
function export.ain_IPA(frame)
    local params = {
        [1] = {list = true, allow_holes = true}
    }

    local args = require("Module:parameters").process(frame:getParent().args, params)

    local items = {}

    -- FIXME: IPA(?): /hi.oj.oj/, [hi.oj.oj] → IPA(?): /hi.oj.oj/ [hi.oj.oj]
    for i = 1, math.max(args[1].maxindex, 1) do
        -- TODO: error("") if not valid Ainu word
        local romanized = args[1][i]
        if not romanized or romanized == "" then
            romanized = mw.title.getCurrentTitle().text
        end

        -- Normalize
        -- # Lower
        romanized = mw.ustring.lower(romanized)
        -- # Clear special characters such as "-", ".", "="
        romanized = mw.ustring.gsub(romanized, "[^%u%l']", "")


        local phonemic = do_convert(romanized)
        local phonetic  = convert_phonetic(romanized)

        table.insert(items, {pron = "/" .. phonemic .. "/"})
        if phonemic ~= phonetic then
            table.insert(items, {pron = "[" .. phonetic .. "]"})
        end
        -- for word in mw.text.gsplit(args[1][i], " ") do
        --     table.insert(items, {pron = "/" .. do_convert(word) .. "/"})
        -- end
    end
    -- end

    local lang = require("Module:languages").getByCode("ain")
	return require("Module:IPA").format_IPA_full { lang = lang, items = items }
end

function export.convert(frame)
    -- Get Args
    local temp = frame.args[1]
    return do_convert(temp)
end

return export