Module:okm-translit
Appearance
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will transliterate Middle Korean language text. It is also used to transliterate Early Modern Korean.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:okm-translit/testcases.
Functions
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
local export = {}
local gsub = mw.ustring.gsub
local chars_Hani = require('Module:scripts').getByCode('Hani'):getCharacters()
local chars_Hang = require('Module:scripts').getByCode('Hang'):getCharacters()
-- https://github.com/szc126/rime-slg-korean/blob/main/slg_break_jamo.yaml
-- https://github.com/szc126/rime-slg-korean/blob/main/soolegi_yethangeul.custom.yaml
local tt_complex = {
['ᄢ']='ᄇᄉᄀ',
['ᄣ']='ᄇᄉᄃ',
['ᄤ']='ᄇᄉᄇ',
['ᄥ']='ᄇᄉᄉ',
['ᄦ']='ᄇᄉᄌ',
['ᄳ']='ᄉᄇᄀ',
['ᄴ']='ᄉᄉᄉ',
['ꥥ']='ᄅᄀᄀ',
['ꥧ']='ᄅᄃᄃ',
['ꥪ']='ᄅᄇᄇ',
['ꥲ']='ᄇᄉᄐ',
['ꥵ']='ᄉᄉᄇ',
['ꥸ']='ᄌᄌᄒ',
['ᄁ']='ᄀᄀ',
['ᄄ']='ᄃᄃ',
['ᄈ']='ᄇᄇ',
['ᄊ']='ᄉᄉ',
['ᄍ']='ᄌᄌ',
['ᄓ']='ᄂᄀ',
['ᄔ']='ᄂᄂ',
['ᄕ']='ᄂᄃ',
['ᄖ']='ᄂᄇ',
['ᄗ']='ᄃᄀ',
['ᄘ']='ᄅᄂ',
['ᄙ']='ᄅᄅ',
['ᄚ']='ᄅᄒ',
['ᄜ']='ᄆᄇ',
['ᄞ']='ᄇᄀ',
['ᄟ']='ᄇᄂ',
['ᄠ']='ᄇᄃ',
['ᄡ']='ᄇᄉ',
['ᄧ']='ᄇᄌ',
['ᄨ']='ᄇᄎ',
['ᄩ']='ᄇᄐ',
['ᄪ']='ᄇᄑ',
['ᄬ']='ᄫᄫ',
['ᄭ']='ᄉᄀ',
['ᄮ']='ᄉᄂ',
['ᄯ']='ᄉᄃ',
['ᄰ']='ᄉᄅ',
['ᄱ']='ᄉᄆ',
['ᄲ']='ᄉᄇ',
['ᄵ']='ᄉᄋ',
['ᄶ']='ᄉᄌ',
['ᄷ']='ᄉᄎ',
['ᄸ']='ᄉᄏ',
['ᄹ']='ᄉᄐ',
['ᄺ']='ᄉᄑ',
['ᄻ']='ᄉᄒ',
['ᄽ']='ᄼᄼ',
['ᄿ']='ᄾᄾ',
['ᅁ']='ᄋᄀ',
['ᅂ']='ᄋᄃ',
['ᅃ']='ᄋᄆ',
['ᅄ']='ᄋᄇ',
['ᅅ']='ᄋᄉ',
['ᅆ']='ᄋᅀ',
['ᅇ']='ᄋᄋ',
['ᅈ']='ᄋᄌ',
['ᅉ']='ᄋᄎ',
['ᅊ']='ᄋᄐ',
['ᅋ']='ᄋᄑ',
['ᅍ']='ᄌᄋ',
['ᅏ']='ᅎᅎ',
['ᅑ']='ᅐᅐ',
['ᅒ']='ᄎᄏ',
['ᅓ']='ᄎᄒ',
['ᅖ']='ᄑᄇ',
['ᅘ']='ᄒᄒ',
['ᅚ']='ᄀᄃ',
['ᅛ']='ᄂᄉ',
['ᅜ']='ᄂᄌ',
['ᅝ']='ᄂᄒ',
['ᅞ']='ᄃᄅ',
['ꥠ']='ᄃᄆ',
['ꥡ']='ᄃᄇ',
['ꥢ']='ᄃᄉ',
['ꥣ']='ᄃᄌ',
['ꥤ']='ᄅᄀ',
['ꥦ']='ᄅᄃ',
['ꥨ']='ᄅᄆ',
['ꥩ']='ᄅᄇ',
['ꥫ']='ᄅᄫ',
['ꥬ']='ᄅᄉ',
['ꥭ']='ᄅᄌ',
['ꥮ']='ᄅᄏ',
['ꥯ']='ᄆᄀ',
['ꥰ']='ᄆᄃ',
['ꥱ']='ᄆᄉ',
['ꥳ']='ᄇᄏ',
['ꥴ']='ᄇᄒ',
['ꥶ']='ᄋᄅ',
['ꥷ']='ᄋᄒ',
['ꥹ']='ᄐᄐ',
['ꥺ']='ᄑᄒ',
['ꥻ']='ᄒᄉ',
['ꥼ']='ᅙᅙ',
['ᆅ']='@ᅩᅡ@',
['ᆒ']='@ᅮᅥ@',
['ᅹ']='@ᅡ@ᅩ',
['ᆄ']='@ᅩᅡ',
['ᆆ']='@ᅩ@ᅥ',
['ᆑ']='@ᅮᅥ',
['ᆥ']='@ᅥ@ᅡ',
['ᆐ']='@ᅮᅥ@',
['ힳ']='@ᅩᅡ@',
['ힷ']='@ᅮᅡ@',
['ᆁ']='ᅩ@ᅥ@',
['ᆌ']='ᅮ@ᅥ@',
['ᆧ']='ᅩ@ᅡ@',
['ힽ']='ᅵ@ᅡᅩ',
['ힾ']='ᅵ@ᅡ@',
['ퟀ']='ᅵ@ᅥ@',
['ᅤ']='@ᅡ@',
['ᅨ']='@ᅥ@',
['ᅸ']='@ᅡᅩ',
['ᅽ']='@ᅥᅩ',
['ᅾ']='@ᅥᅮ',
['ᆇ']='@ᅩᅩ',
['ᆈ']='@ᅩ@',
['ᆎ']='@ᅮᅡ',
['ᆏ']='@ᅮᅥ',
['ᆓ']='@ᅮᅮ',
['ᆔ']='@ᅮ@',
['ᆤ']='@ᅡᅮ',
['ힲ']='@ᅩᅡ',
['ힴ']='@ᅩᅥ',
['ힸ']='@ᅮᅩ',
['ᆙ']='ᅵ@ᅡ',
['ᆦ']='ᅩ@ᅡ',
['ힰ']='ᅩ@ᅥ',
['ힵ']='ᅮ@ᅥ',
['ힿ']='ᅵ@ᅥ',
['ퟂ']='ᅵ@ᅩ',
['ퟃ']='ᅵ@ᅮ',
['ᅫ']='ᅩᅡ@',
['ᅰ']='ᅮᅥ@',
['ᆀ']='ᅩᅥ@',
['ᆊ']='ᅮᅡ@',
['ᆋ']='ᅮᅥᅳ',
['ᆗ']='ᅳᅵᅮ',
['ힱ']='ᅩᅩᅵ',
['ힶ']='ᅮᅵ@',
['ힻ']='ᅳᅥ@',
['ퟁ']='ᅵᅩᅵ',
['ퟆ']='ᆞᅥ@',
['ᅣ']='@ᅡ',
['ᅧ']='@ᅥ',
['ᅭ']='@ᅩ',
['ᅲ']='@ᅮ',
['ᅢ']='ᅡ@',
['ᅦ']='ᅥ@',
['ᅪ']='ᅩᅡ',
['ᅬ']='ᅩ@',
['ᅯ']='ᅮᅥ',
['ᅱ']='ᅮ@',
['ᅴ']='ᅳ@',
['ᅶ']='ᅡᅩ',
['ᅷ']='ᅡᅮ',
['ᅺ']='ᅥᅩ',
['ᅻ']='ᅥᅮ',
['ᅼ']='ᅥᅳ',
['ᅿ']='ᅩᅥ',
['ᆂ']='ᅩᅩ',
['ᆃ']='ᅩᅮ',
['ᆉ']='ᅮᅡ',
['ᆍ']='ᅮᅮ',
['ᆕ']='ᅳᅮ',
['ᆖ']='ᅳᅳ',
['ᆘ']='ᅵᅡ',
['ᆚ']='ᅵᅩ',
['ᆛ']='ᅵᅮ',
['ᆜ']='ᅵᅳ',
['ᆝ']='ᅵᆞ',
['ᆟ']='ᆞᅥ',
['ᆠ']='ᆞᅮ',
['ᆡ']='ᆞ@',
['ᆢ']='ᆞᆞ',
['ᆣ']='ᅡᅳ',
['ힹ']='ᅳᅡ',
['ힺ']='ᅳᅥ',
['ힼ']='ᅳᅩ',
['ퟄ']='ᅵ@',
['ퟅ']='ᆞᅡ',
['ᇄ']='ᆨᆺᆨ',
['ᇌ']='ᆯᆨᆺ',
['ᇏ']='ᆯᆮᇂ',
['ᇑ']='ᆯᆷᆨ',
['ᇒ']='ᆯᆷᆺ',
['ᇓ']='ᆯᆸᆺ',
['ᇔ']='ᆯᆸᇂ',
['ᇖ']='ᆯᆺᆺ',
['ᇞ']='ᆷᆺᆺ',
['ᇭ']='ᇰᆨᆨ',
['ퟎ']='ᆮᆮᆸ',
['ퟑ']='ᆮᆺᆨ',
['ퟕ']='ᆯᆨᆨ',
['ퟖ']='ᆯᆨᇂ',
['ퟗ']='ᆯᆯᆿ',
['ퟘ']='ᆯᆷᇂ',
['ퟙ']='ᆯᆸᆮ',
['ퟚ']='ᆯᆸᇁ',
['ퟜ']='ᆯᇹᇂ',
['ퟟ']='ᆷᆫᆫ',
['ퟡ']='ᆷᆸᆺ',
['ퟤ']='ᆸᆯᇁ',
['ퟧ']='ᆸᆺᆮ',
['ퟬ']='ᆺᆺᆨ',
['ퟭ']='ᆺᆺᆮ',
['ퟸ']='ᆽᆸᆸ',
['ᆩ']='ᆨᆨ',
['ᆪ']='ᆨᆺ',
['ᆬ']='ᆫᆽ',
['ᆭ']='ᆫᇂ',
['ᆰ']='ᆯᆨ',
['ᆱ']='ᆯᆷ',
['ᆲ']='ᆯᆸ',
['ᆳ']='ᆯᆺ',
['ᆴ']='ᆯᇀ',
['ᆵ']='ᆯᇁ',
['ᆶ']='ᆯᇂ',
['ᆹ']='ᆸᆺ',
['ᆻ']='ᆺᆺ',
['ᇃ']='ᆨᆯ',
['ᇅ']='ᆫᆨ',
['ᇆ']='ᆫᆮ',
['ᇇ']='ᆫᆺ',
['ᇈ']='ᆫᇫ',
['ᇉ']='ᆫᇀ',
['ᇊ']='ᆮᆨ',
['ᇋ']='ᆮᆯ',
['ᇍ']='ᆯᆫ',
['ᇎ']='ᆯᆮ',
['ᇐ']='ᆯᆯ',
['ᇕ']='ᆯᇦ',
['ᇗ']='ᆯᇫ',
['ᇘ']='ᆯᆿ',
['ᇙ']='ᆯᇹ',
['ᇚ']='ᆷᆨ',
['ᇛ']='ᆷᆯ',
['ᇜ']='ᆷᆸ',
['ᇝ']='ᆷᆺ',
['ᇟ']='ᆷᇫ',
['ᇠ']='ᆷᆾ',
['ᇡ']='ᆷᇂ',
['ᇣ']='ᆸᆯ',
['ᇤ']='ᆸᇁ',
['ᇥ']='ᆸᇂ',
['ᇧ']='ᆺᆨ',
['ᇨ']='ᆺᆮ',
['ᇩ']='ᆺᆯ',
['ᇪ']='ᆺᆸ',
['ᇬ']='ᇰᆨ',
['ᇮ']='ᇰᇰ',
['ᇯ']='ᇰᆿ',
['ᇱ']='ᇰᆺ',
['ᇲ']='ᇰᇫ',
['ᇳ']='ᇁᆸ',
['ᇵ']='ᇂᆫ',
['ᇶ']='ᇂᆯ',
['ᇷ']='ᇂᆷ',
['ᇸ']='ᇂᆸ',
['ᇺ']='ᆨᆫ',
['ᇻ']='ᆨᆸ',
['ᇼ']='ᆨᆾ',
['ᇽ']='ᆨᆿ',
['ᇾ']='ᆨᇂ',
['ᇿ']='ᆫᆫ',
['ퟋ']='ᆫᆯ',
['ퟌ']='ᆫᆾ',
['ퟍ']='ᆮᆮ',
['ퟏ']='ᆮᆸ',
['ퟐ']='ᆮᆺ',
['ퟒ']='ᆮᆽ',
['ퟓ']='ᆮᆾ',
['ퟔ']='ᆮᇀ',
['ퟛ']='ᆯᇰ',
['ퟞ']='ᆷᆫ',
['ퟠ']='ᆷᆷ',
['ퟢ']='ᆷᆽ',
['ퟣ']='ᆸᆮ',
['ퟥ']='ᆸᆷ',
['ퟦ']='ᆸᆸ',
['ퟨ']='ᆸᆽ',
['ퟩ']='ᆸᆾ',
['ퟪ']='ᆺᆷ',
['ퟫ']='ᆺᇦ',
['ퟮ']='ᆺᇫ',
['ퟯ']='ᆺᆽ',
['ퟰ']='ᆺᆾ',
['ퟱ']='ᆺᇀ',
['ퟲ']='ᆺᇂ',
['ퟳ']='ᇫᆸ',
['ퟴ']='ᇫᇦ',
['ퟵ']='ᇰᆷ',
['ퟶ']='ᇰᇂ',
['ퟷ']='ᆽᆸ',
['ퟹ']='ᆽᆽ',
['ퟺ']='ᇁᆺ',
['ퟻ']='ᇁᇀ',
-- compatibility jamo
['ㅩ']='ᄅᄀᄉ',
['ㅫ']='ᄅᄇᄉ',
['ㅴ']='ᄇᄉᄀ',
['ㅵ']='ᄇᄉᄃ',
['ㄲ']='ᄀᄀ',
['ㄸ']='ᄃᄃ',
['ㅃ']='ᄇᄇ',
['ㄳ']='ᄀᄉ',
['ㄵ']='ᄂᄌ',
['ㄶ']='ᄂᄒ',
['ㄺ']='ᄅᄀ',
['ㄻ']='ᄅᄆ',
['ㄼ']='ᄅᄇ',
['ㄽ']='ᄅᄉ',
['ㄾ']='ᄅᄐ',
['ㄿ']='ᄅᄑ',
['ㅀ']='ᄅᄒ',
['ㅄ']='ᄇᄉ',
['ㅆ']='ᄉᄉ',
['ㅉ']='ᄌᄌ',
['ㅥ']='ᄂᄂ',
['ㅦ']='ᄂᄃ',
['ㅧ']='ᄂᄉ',
['ㅨ']='ᄂᅀ',
['ㅪ']='ᄅᄃ',
['ㅬ']='ᄅᅀ',
['ㅭ']='ᄅᅙ',
['ㅮ']='ᄆᄇ',
['ㅯ']='ᄆᄉ',
['ㅰ']='ᄆᅀ',
['ㅲ']='ᄇᄀ',
['ㅳ']='ᄇᄃ',
['ㅶ']='ᄇᄌ',
['ㅷ']='ᄇᄐ',
['ㅹ']='ᄫᄫ',
['ㅺ']='ᄉᄀ',
['ㅻ']='ᄉᄂ',
['ㅼ']='ᄉᄃ',
['ㅽ']='ᄉᄇ',
['ㅾ']='ᄉᄌ',
['ㆀ']='ᄋᄋ',
['ㆂ']='ᅌᄉ',
['ㆃ']='ᅌᅀ',
['ㆅ']='ᄒᄒ',
['ㄱ']='ᄀ',
['ㄴ']='ᄂ',
['ㄷ']='ᄃ',
['ㄹ']='ᄅ',
['ㅁ']='ᄆ',
['ㅂ']='ᄇ',
['ㅅ']='ᄉ',
['ㅇ']='ᄋ',
['ㅈ']='ᄌ',
['ㅊ']='ᄎ',
['ㅋ']='ᄏ',
['ㅌ']='ᄐ',
['ㅍ']='ᄑ',
['ㅎ']='ᄒ',
['ㅤ']='ᅟ', -- filler
['ㅱ']='ᄝ',
['ㅸ']='ᄫ',
['ㅿ']='ᅀ',
['ㆁ']='ᅌ',
['ㆄ']='ᅗ',
['ㆆ']='ᅙ',
['ㆈ']='@ᅩ@ᅡᅵ',
['ㆋ']='@ᅮ@ᅥᅵ',
['ㆇ']='@ᅩ@ᅡ',
['ㆊ']='@ᅮ@ᅥ',
['ㅒ']='@ᅡᅵ',
['ㅖ']='@ᅥᅵ',
['ㅙ']='ᅩᅡᅵ',
['ㅞ']='ᅮᅥᅵ',
['ㆉ']='@ᅩᅵ',
['ㆌ']='@ᅮᅵ',
['ㅐ']='ᅡᅵ',
['ㅑ']='@ᅡ',
['ㅔ']='ᅥᅵ',
['ㅕ']='@ᅥ',
['ㅘ']='ᅩᅡ',
['ㅚ']='ᅩᅵ',
['ㅛ']='@ᅩ',
['ㅝ']='ᅮᅥ',
['ㅟ']='ᅮᅵ',
['ㅠ']='@ᅮ',
['ㅢ']='ᅳᅵ',
['ㅏ']='ᅡ',
['ㅓ']='ᅥ',
['ㅗ']='ᅩ',
['ㅜ']='ᅮ',
['ㅡ']='ᅳ',
['ㅣ']='ᅵ',
['ㆍ']='ᆞ',
}
local tt = [==[
BREAK 1
# remove hanja from (ex.) 사뎐(辭典)
# caps prob. isn't necessary since the "base" text is actually hangeul?
# Hani regex is a reasonable subset of Hani from [[Module:scripts/data]],
# last checked on 20220221
%([一-鿿㐀-䶿𠀀-𰀀-]+%) ×
# to yale
# non-simple
gᄋ Ğ # voiced velar fricative /ɣ/
ᄋᄋ Ő
@ᅩᅡ ywa
@ᅮᅥ ywe
@ᅮ ywu
@ᅩ ywo
ᅩᅡ wa
ᅮᅥ we
ᅵᆞ yo
ᆞᆞ yo
# choseong
ᄀ K
ᄂ N
ᄃ T
ᄅ L
ᄆ M
ᄇ P
ᄉ S
ᄋ Ø
ᄌ C
ᄎ CH
ᄏ KH
ᄐ TH
ᄑ PH
ᄒ H
ᄝ ◆
ᄫ Ƃ
ᅗ ◆
ᄛ ◆
ᅌ Ŋ
ᅀ Z
ᅙ Q
ᄼ ◆
ᅎ ◆
ᅔ ◆
ᄾ ◆
ᅐ ◆
ᅕ ◆
ᅟ × # filler
# jungseong
@ y
ᅡ a
ᅥ e
ᅩ wo
ᅮ wu
ᅳ u
ᅵ i
ᆞ o
ᅠ × # filler
# jongseong
ᆨ k
ᆫ n
ᆮ t
ᆯ l
ᆷ m
ᆸ p
ᆺ s
ᆼ ø
ᆽ c
ᆾ ch
ᆿ kh
ᇀ th
ᇁ ph
ᇂ h
ᇢ W
ᇦ ƃ
ᇴ ◆
ퟝ ◆
ᇰ ŋ
ᇫ z
ᇹ q
# tone
〮 ↑
〯 →
# tone diacritic location
([aiueo]+)([y]?)([↑→↓]) %1%3%2
# hyphens within syllables
BREAK 2
# hyphens within syllables (legacy)
# CV-y
# CVC-C
# CV-C
# C-V
%-%-%-%-(.-[wyaiueo↑→↓]+)(y) %1-%2
%-%-%-(.-[wyaiueo↑→↓]+[^wyaiueo ])([^wyaiueo ]) %1-%2
%-%-%-(.-[wyaiueo↑→↓]+) %1-
%-%-(.-)([wyaiueo]) %1-%2
# 子(ᄌᆞ)ㅣ
(%))(%-?)i %1%2y
Ø ×
BREAK 3
↑ ́
→ ̌
↓ ̀
ğ G
ő OO
Ø NG # capitalized hanja readings
ø ng
ƃ W
Ŋ NG # capitalized hanja readings
ŋ ng
]==]
tt = mw.text.trim(tt)
tt = mw.ustring.gsub(tt, '%s*#[^\n]+', '') -- remove comments
tt = mw.ustring.gsub(tt, '\n+', '\n') -- remove empty lines
local a, b, c, d = 'ᄀᄂᄃᄅᄆᄇᄉᄋᄌᄎᄏᄐᄑᄒᄝᄫᅗᄛᅌᅀᅙᄼᅎᅔᄾᅐᅕᅟ', '@ᅡᅥᅩᅮᅳᅵᆞᅠ', 'ᆨᆫᆮᆯᆷᆸᆺᆼᆽᆾᆿᇀᇁᇂᇢᇦᇴퟝᇰᇫᇹ', '〮〯'
function export.tr(text, lang, sc)
text = gsub(text, "%<%/?r[pt]%>", "")
text = gsub(text, "%<%/?ruby%>", "")
if not mw.ustring.match(text, '[' .. chars_Hang .. ']') then
return nil
end
local bool_tone_marking = mw.ustring.find(text, ('[%s]'):format(d))
text = mw.ustring.toNFD(text)
text = mw.ustring.gsub(text, '.', tt_complex)
for line in mw.text.gsplit(tt, '\n') do
local _, __, pattern, repl = mw.ustring.find(line, '(.+)\t(.+)')
if pattern .. repl == 'BREAK1' then
-- add period between hanja readings
text = mw.ustring.gsub(text, '([' .. chars_Hani .. '])%((.-)%)', function(hanja, reading)
return hanja .. '(' .. mw.ustring.gsub(reading, ('([%s]+)'):format(a), '.%1') .. ')'
end)
if bool_tone_marking then
-- move the location of tone marks for easier handling and
-- mark low tone
local syllable_pat = ('([%s]+)([%s]+)([%s]*)([%s]*)'):format(a, b, c, d)
text = mw.ustring.gsub(text, syllable_pat, function(a, b, c, d)
return a .. b .. (d == '' and '↓' or d) .. (c == '' and '' or c)
end)
end
elseif pattern .. repl == 'BREAK2' then
-- handle shifted hyphens (e.g. ->>>곬 "kwol-s")
for i=1,10 do
text, count = mw.ustring.gsub(
text,
'%-(>+)([ĞŐKNTLMPSØCHƂŊZQ]+)(%-?)(y?)(%-?)([waiueo↑→↓]+)(%-?)(y?)(%-?)([kntlmpsøchƃŋzq]*)',
function(shifts, initials, h1, y1, h2, vowel, h3, y2, h4, finals)
local num_shifts = mw.ustring.len(shifts)
local initial_len = mw.ustring.len(initials)
if initials == 'Ø' then
initial_len = 0
end
local y1_len = mw.ustring.len(y1)
local y2_len = mw.ustring.len(y2)
local vowel_len = 1
if mw.ustring.find(vowel, '^w') then
vowel_len = 2
end
local sub = mw.ustring.sub
if num_shifts <= initial_len then
initials = sub(initials, 1, num_shifts) .. '-' .. sub(initials, num_shifts + 1, -1)
elseif num_shifts <= initial_len + y1_len then
y1 = y1 .. '-'
elseif num_shifts <= initial_len + y1_len + vowel_len then
num_shifts = num_shifts - (initial_len + y1_len)
if num_shifts == vowel_len then
vowel = vowel .. '-'
else
vowel = sub(vowel, 1, num_shifts) .. '-' .. sub(vowel, num_shifts + 1, -1)
end
elseif num_shifts <= initial_len + y1_len + vowel_len + y2_len then
y2 = y2 .. '-'
else
num_shifts = num_shifts - (initial_len + y1_len + vowel_len + y2_len)
finals = sub(finals, 1, num_shifts) .. '-' .. sub(finals, num_shifts + 1, -1)
end
return initials .. h1 .. y1 .. h2 .. vowel .. h3 .. y2 .. h4 .. finals
end
)
if count == 0 then
break
end
end
elseif pattern .. repl == 'BREAK3' then
text = mw.ustring.lower(text)
-- hanja readings
-- ref. [[Module:Ethi-translit]]
text = mw.ustring.gsub(text, '()([' .. chars_Hani .. ']+)%((.-)%)()', function(start_pos, hanja, reading, end_pos)
-- treat final ieung as null if tones are marked (is this a safe assumption?)
if bool_tone_marking then
reading = mw.ustring.gsub(reading, 'ø', '')
end
-- convert to uppercase
reading = mw.ustring.upper(reading)
return reading
end)
-- remove hanja reading leading period
text = mw.ustring.gsub(text, '^%.', '')
text = mw.ustring.gsub(text, "'''%.", "'''")
text = mw.ustring.gsub(text, '(%s)%.', '%1')
else
if repl == '×' then
repl = ''
end
text = mw.ustring.gsub(text, pattern, repl)
end
end
-- track failed romanizations
-- (black diamond instead of U+FFFD to avoid warnings when saving this page)
if mw.ustring.match(text, '◆') then
require('Module:debug').track('okm-translit/failed romanization')
end
return text
end
return export