Module:ckb-translit
Appearance
- The following documentation is located at Module:ckb-translit/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will transliterate Central Kurdish language text. It is also used to transliterate Gurani and Northern Kurdish.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:ckb-translit/testcases.
Functions
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
-- Authors: JavaScript ئاسۆ; Lua Ghybu, Calak
local export = {}
local m_str_utils = require("Module:string utilities")
local find = m_str_utils.find
local gsub = m_str_utils.gsub
local sub = m_str_utils.sub
local U = m_str_utils.char
local mapping = {
["ا"] = "a", ["ب"] = "b", ["چ"] = "ç", ["ج"] = "c", ["د"] = "d", ["ە"] = "e", ["ێ"] = "ê", ["ف"] = "f", ["گ"] = "g",
["ھ"] = "h", ["ه"] = "h", ["ح"] = "ḧ", ["ژ"] = "j", ["ک"] = "k", ["ڵ"] = "ll", ["ل"] = "l", ["م"] = "m", ["ن"] = "n",
["ۆ"] = "o", ["پ"] = "p", ["ق"] = "q", ["ر"] = "r", ["ڕ"] = "r", ["س"] = "s", ["ش"] = "ş", ["ت"] = "t", ["ڤ"] = "v",
["خ"] = "x", ["غ"] = "ẍ", ["ز"] = "z", ["ئ"] = "", ["ع"] = "'",
[U(0x200C)] = "", -- ZWNJ (zero-width non-joiner)
["ـ"] = "", -- kashida, no sound
-- numerals
["١"] = "1", ["٢"] = "2", ["٣"] = "3", ["٤"] = "4", ["٥"] = "5",
["٦"] = "6", ["٧"] = "7", ["٨"] = "8", ["٩"] = "9", ["٠"] = "0",
-- persian variants to numerals
["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
}
-- punctuation (leave on separate lines)
local punctuation = {
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousand
}
-- translit
local function tr_word(word)
word = gsub(word, '.', punctuation)
--Remove punctuation at the end of the word.
local ponct
if find(word, '[%.%!،؛»«٪؉٫٬%p]$') then
ponct = sub(word, -1)
word = gsub(word, '[%.%!،؛»«٪؉٫٬%p]$', '')
else
word = word
ponct = ''
end
word = gsub(word, 'ه', "ە") --correct unicode for letter ە
-- U+0647 (Arabic letter heh) + U+200C (zero-width non-joiner)
-- ↓
-- U+06D5 (Arabic letter ae)
-- diacritics
word = gsub(word, 'ْ', "i") -- U+0652, Arabic sukun
word = gsub(word, 'ِ', "i") -- U+0650, Arabic kasra
--managing 'و' and 'ی'
word = gsub(word, 'و([iاێۆە])', "w%1") --و + vowel => w (e.g. wan)
word = gsub(word, 'ی([iاێۆە])', "y%1") --ی + vowel => y (e.g. yas)
word = gsub(word, '([iاێۆە])و', "%1w") --vowel + و => w (e.g. kew)
word = gsub(word, '([iاێۆە])ی', "%1y") --vowel + ی => y (e.g. bey)
word = gsub(word, '^و$', "û") --non-letter + 'و' + non-letter => û (=and)
word = gsub(word, '([^ء-يٱ-ەiwy])و', "%1w") --non-letter + 'و' => w (e.g. wtar)
word = gsub(word, '^و', "w") --first 'و' => w (e.g. wtar)
word = gsub(word, 'یو', "îw") --'ی' + 'و' => îw (e.g. nîw)
word = gsub(word, '([^و])یی', "%1îy") --'ی' + 'ی' => îy (e.g. kanîy)
word = gsub(word, 'وی', "uy") --'و' + 'ی' => uy (e.g. buyn)
word = gsub(word, 'وو', "û") --'و' + 'و' => û (e.g. bû)
word = gsub(word, 'ی', "î")
word = gsub(word, 'و', "u")
word = gsub(word, 'uu', "û") --'و' + 'و' => û (e.g. bû)
word = gsub(word, '([ء-يٱ-ەiîuûwy])ڕ', "%1rr") --when 'ڕ' not at the beginning of a word => rr
word = gsub(word, '([ء-يٱ-ەiîuûwy])ئ', "%1'") --when 'ئ' not at the beginning of a word => '
word = gsub(word, '.', mapping)
--insert i where applicable
word = gsub(word, 'll', "Ľ") -- temporary conversion to avoid seeing ll as 2 letters
word = gsub(word, 'rr', "Ŕ") -- temporary conversion to avoid seeing rr as 2 letters
word = gsub(word, '([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([fjlĽmnrŔsşvwxẍyz])([fjlĽmnrŔsşvwxẍyz])([^aeêiîouûy])', "%1%2i%3%4") --e.g. grft -> grift
word = gsub(word, '([aeêiîouû])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$', "%1%2%3i%4") --e.g. cejnt -> cejnit
word = gsub(word, '([fjlĽrŔsşwyz])([fjlĽmnrŔsşvwxẍyz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])', "%1i%2%3") --e.g. wrd -> wird
word = gsub(word, '([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])([^aeêiîouû])', "%1i%2%3") --e.g. prd -> pird
word = gsub(word, '([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])$', "%1i%2") --like above
word = gsub(word, '([^aeêiîouû])([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])([^aeêiîouû])', "%1%2i%3%4") --repeat the latter expression, in case skipped
word = gsub(word, '([^aeêiîouû])([bcçdghḧkmnpqtvxẍ])([fjlĽmnrŔsşvwxẍyz])$', "%1%2i%3") --repeat the latter expression, in case skipped
word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([^aeêiîouû])', "%1i%2%3") --e.g. ktk -> kitk
word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$', "%1i%2") --e.g. ktk -> kitk
word = gsub(word, '([^aeêiîouy])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([^aeêiîouû])', "%1%2i%3%4") --e.g. ktk -> kitk
word = gsub(word, '([^aeêiîouy])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$', "%1%2i%3") --e.g. ktk -> kitk
word = gsub(word, '([^a-zçşêîûĽŔ])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$', "%1%2i") --e.g. j -> ji
word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])$', "%1i") --e.g. j -> ji
--word = gsub(word, '([^a-zêîûçş0-9\'’])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvxẍz])', "%1%2i%3") --e.g. bra -> bira
--word = gsub(word, '^([bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvxẍz])', "%1i%2") --e.g. bra -> bira
--word = gsub(word, '([bcçdfghḧjklmnpqrsştvwxẍz][bcçdfghḧjklĽmnpqrŔsştvwxẍz])([bcçdfghḧjklĽmnpqrŔsştvwxẍz])', "%1i%2") --e.g. aşkra -> aşkira
--word = gsub(word, 'si([tp][aeêiîouû])', "s%1") -- sp, st cluster
word = gsub(word, 'Ľ', "ll") --revert the temporary conversion
word = gsub(word, 'Ŕ', "rr") --revert the temporary conversion
-- Add the punctuation who had previously deleted.
word = word .. ponct
return word
end
function export.tr(text, lang, sc)
local textTab = {}
-- Create a word table separated by a space (%s).
for _, word in ipairs(mw.text.split(text, '%s+')) do
table.insert(textTab, word)
end
-- Tablo of translit.
for key, word in ipairs(textTab) do
textTab[key] = tr_word(word)
end
return table.concat(textTab, ' ')
end
return export