Module:aii-conj

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local U = require("Module:string/char")
local rsub = mw.ustring.gsub
local rmatch = mw.ustring.match
local rlen = mw.ustring.len

-- Vowel Diacritics
local KHWASA_RWASA = U(0x73C)
local KHWASA_RWAKHA = U(0x73F)
local PTAKHA = U(0x732)
local ZQAPA = U(0x735)
local ZLAMA_KIRYA = U(0x738)
local ZLAMA_YAREEKHA = U(0x739)
local VOWEL_DIACRITICS = KHWASA_RWASA .. KHWASA_RWAKHA .. PTAKHA .. ZQAPA .. ZLAMA_KIRYA .. ZLAMA_YAREEKHA

-- Non-vowel Diacritics
local COMBINING_TILDE_BELOW = U(0x330)
local COMBINING_TILDE_ABOVE = U(0x303)
local QUSHSHAYA = U(0x741)
local RUKKAKHA = U(0x742)
local COMBINING_BREVE_BELOW = U(0x32E)
local TALQANA_ABOVE = U(0x747)
local NON_VOWEL_DIACRITICS = COMBINING_TILDE_BELOW .. COMBINING_TILDE_ABOVE .. QUSHSHAYA .. RUKKAKHA .. COMBINING_BREVE_BELOW .. TALQANA_ABOVE

-- Consonants and Vowel Helpers
local CONSONANTS = 'ܦܒܬܛܕܟܓܩܣܨܙܫܚܥܗܡܢܪܠ'
local ALAP = 'ܐ'
local WAW = 'ܘ'
local YUDH = 'ܝ'
local VOWEL_HELPERS = ALAP .. WAW .. YUDH
local LETTERS = CONSONANTS .. VOWEL_HELPERS

-- Special Cases
local special_cases = {
    -- These are cases where the non-past, 3rd person, singular, male form of the verb cannot be used to reliably infer the other forms
    -- Though the the continuous form is always reliable, the verbs on wiktionary have been normalized to the former
    -- 
    -- override matching Template:aii-conj-verb/G-2i since medial alap is strong
    ['ܟܵܐܹܒ'] = {'aii-conj-verb/G-strong', 'ܟ', 'ܐ', 'ܒ'},
    ['ܛܵܐܹܒ݂'] = {'aii-conj-verb/G-strong', 'ܛ', 'ܐ', 'ܒ݂'},
    ['ܫܵܐܹܠ'] = {'aii-conj-verb/G-strong', 'ܫ', 'ܐ', 'ܠ'},
    ['ܣܵܐܹܢ'] = {'aii-conj-verb/G-strong', 'ܣ', 'ܐ', 'ܢ'},
    -- no matches since medial alap is strong and first radical is yudh
    ['ܝܵܐܹܒ݂'] = {'aii-conj-verb/G-strong', 'ܝ', 'ܐ', 'ܒ݂'},

    -- Irregular Conjugations
    ['ܗܵܘܹܐ'] = {'aii-conj-haweh'},
    ['ܐܵܬ݂ܹܐ'] = {'aii-conj-verb/atheh'},
    ['ܐܵܙܹܠ'] = {'aii-conj-verb/azel'},
    ['ܝܵܗ݇ܒ݂ܹܠ'] = {'aii-conj-verb/yavel'},
}

-- Create Capture Groups
local function create_cg(characters)
    return '(' .. '[' .. characters .. ']' .. '[' .. NON_VOWEL_DIACRITICS .. ']?' .. ')'
end

local function match_groups(text, pattern)
    local matches = {}
    local g1, g2, g3, g4, g5 = rmatch(text, pattern)
    if g1 then table.insert(matches, g1) end
    if g2 then table.insert(matches, g2) end
    if g3 then table.insert(matches, g3) end
    if g4 then table.insert(matches, g4) end
    if g5 then table.insert(matches, g5) end
    
    return matches
end

-- https://en.wiktionary.org/wiki/Category:Assyrian_Neo-Aramaic_verb_inflection-table_templates
local patterns = {
    {'C-strong', 4, '^ܡ' .. PTAKHA .. create_cg(LETTERS) .. create_cg(CONSONANTS .. ALAP .. WAW) .. ZLAMA_KIRYA .. create_cg(CONSONANTS) .. '$'},
    {'C-weak-2d-3i', 3, '^ܡ' .. PTAKHA .. create_cg(LETTERS) .. ZLAMA_YAREEKHA .. ALAP .. '$'},
    {'C-weak-3i', 4, '^ܡ' .. PTAKHA .. create_cg(LETTERS) .. create_cg(LETTERS) .. ZLAMA_YAREEKHA .. ALAP .. '$'},
    {'C-weak-d', 3, '^ܡ' .. PTAKHA .. create_cg(CONSONANTS) .. ZLAMA_KIRYA .. create_cg(CONSONANTS) .. '$'},
    {'C-weak-m', 4, '^ܡ' .. PTAKHA .. create_cg(LETTERS) .. YUDH .. ZLAMA_KIRYA .. create_cg(CONSONANTS) .. '$'},

    {'D-strong', 4, '^ܡ' .. create_cg(LETTERS) .. PTAKHA .. create_cg(LETTERS) .. ZLAMA_KIRYA .. create_cg(LETTERS) .. '$'},
    {'D-weak-3i', 4, '^ܡ' .. create_cg(LETTERS) .. PTAKHA .. create_cg(CONSONANTS .. WAW) .. ZLAMA_YAREEKHA .. ALAP .. '$'},

    {'G-strong', 3, '^' .. create_cg(CONSONANTS .. YUDH) .. ZQAPA .. create_cg(CONSONANTS .. WAW) .. ZLAMA_YAREEKHA .. create_cg(CONSONANTS) .. '$'},
    {'G-weak-1i', 3, '^' .. ALAP .. ZQAPA .. create_cg(CONSONANTS .. ALAP) .. ZLAMA_YAREEKHA .. create_cg(CONSONANTS) .. '$'},
    {'G-weak-2i', 3, '^' .. create_cg(CONSONANTS) .. ZQAPA .. '[' .. ALAP .. YUDH .. ']' .. ZLAMA_YAREEKHA .. create_cg(CONSONANTS) .. '$'},
    {'G-weak-3i', 3, '^' .. create_cg(CONSONANTS .. ALAP .. YUDH) .. ZQAPA .. create_cg(LETTERS) .. ZLAMA_YAREEKHA .. ALAP .. '$'},
    {'Gt-strong', 5, '^ܡ' .. ZLAMA_KIRYA .. 'ܬ' .. RUKKAKHA .. create_cg(CONSONANTS) .. create_cg(CONSONANTS) .. ZLAMA_YAREEKHA .. create_cg(CONSONANTS .. ALAP) .. '$'},

    {'Penta-strong', 6, '^ܡ' .. create_cg(LETTERS) .. PTAKHA .. create_cg(CONSONANTS) .. create_cg(CONSONANTS) .. create_cg(LETTERS) .. ZLAMA_KIRYA .. create_cg(CONSONANTS) .. '$'},

    {'Q-strong', 5, '^ܡ' .. create_cg(LETTERS) .. PTAKHA .. create_cg(CONSONANTS .. WAW .. YUDH) .. create_cg(CONSONANTS .. ALAP .. WAW) .. ZLAMA_KIRYA .. create_cg(CONSONANTS) .. '$'},
    {'Q-weak', 5, '^ܡ' .. create_cg(LETTERS) .. PTAKHA .. create_cg(CONSONANTS .. WAW) .. create_cg(CONSONANTS) .. ZLAMA_YAREEKHA .. ALAP .. '$'},
    {'Qi-strong', 6, '^ܡܸܫܬܲ' .. create_cg(CONSONANTS) .. create_cg(CONSONANTS) .. ZLAMA_KIRYA .. create_cg(CONSONANTS) .. '$'},
}

function export._conj(aii_v)
    -- given the non-past, 3rd person, single, masculine form of a verb
    -- return the template name and the arguments to that template which generate the other forms
    local matched_patterns = {}
    local vowel_diacritics_stripped = rsub(aii_v, '[^' .. LETTERS .. ']', '')

    if special_cases[aii_v] then
        local args = {unpack(special_cases[aii_v], 2)}
        local irregular_conjugation = (#special_cases[aii_v] < 2)

        return {
            title = special_cases[aii_v][1],
            args = irregular_conjugation and {} or args,
        }
    end

    for _, pattern_tuple in ipairs(patterns) do
        local pattern, num_atwateh, regex = pattern_tuple[1], pattern_tuple[2], pattern_tuple[3]
        local args = match_groups(aii_v, regex)

        if rlen(vowel_diacritics_stripped) == num_atwateh and #args > 0 then
            table.insert(matched_patterns, {
                title = 'aii-conj-verb/' .. pattern,
                args = args
            })
        end
    end

    if #matched_patterns == 0 then
        error(aii_v .. ' matched no existing conjugation patterns')
    elseif #matched_patterns == 1 then
        return matched_patterns[1]
    else
        error(aii_v .. ' matched multiple patterns - it should only match one conjugation pattern')
    end
end

function export.conj(frame)
    local aii_v = frame.args[1]
    local result = export._conj(aii_v)
    return frame:expandTemplate{ title = result.title, args = result.args }
end

return export