Module:IPA/data/X-SAMPA
Appearance
< Module:IPA | data
- The following documentation is located at Module:IPA/data/X-SAMPA/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox
This module provides the data used to convert X-SAMPA characters to IPA, and the other way around. The list includes some non-X-SAMPA shortcuts, which are marked by comments. The meaning of diacritics is described in comments. Search the meaning of the diacritic you are looking for, and you will find its shortcut.
local U = require("Module:string/char")
local gmatch = mw.ustring.gmatch
local data = {
-- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA
["b\\"] = { "ⱱ" },
["b_<"] = { "ɓ" },
["d`"] = { "ɖ", has_descender = true },
["d_<"] = { "ɗ" },
-- not in official X-SAMPA; Wikipedia-specific
["d`_<"] = { "ᶑ", has_descender = true },
["g"] = { "ɡ", has_descender = true },
["g_<"] = { "ɠ", has_descender = true },
["h\\"] = { "ɦ" },
["j\\"] = { "ʝ", has_descender = true },
["l`"] = { "ɭ", has_descender = true },
["l\\"] = { "ɺ" },
["n`"] = { "ɳ", has_descender = true },
["p\\"] = { "ɸ", has_descender = true },
["r`"] = { "ɽ", has_descender = true },
["r\\"] = { "ɹ" },
["r\\`"] = { "ɻ", has_descender = true },
["s`"] = { "ʂ", has_descender = true },
["s\\"] = { "ɕ" },
["t`"] = { "ʈ" },
["v\\"] = { "ʋ" },
["x\\"] = { "ɧ", has_descender = true },
["z`"] = { "ʐ", has_descender = true },
["z\\"] = { "ʑ" },
["A"] = { "ɑ" },
["B"] = { "β", has_descender = true },
["B\\"] = { "ʙ" },
["C"] = { "ç", has_descender = true },
["D"] = { "ð" },
["E"] = { "ɛ" },
["F"] = { "ɱ", has_descender = true },
["G"] = { "ɣ", has_descender = true },
["G\\"] = { "ɢ" },
["G\\_<"] = { "ʛ" },
["H"] = { "ɥ", has_descender = true },
["H\\"] = { "ʜ" },
["I"] = { "ɪ" },
["I\\"] = { "ɪ̈" },
["J"] = { "ɲ", has_descender = true },
["J\\"] = { "ɟ" },
["J\\_<"] = { "ʄ", has_descender = true },
["K"] = { "ɬ" },
["K\\"] = { "ɮ", has_descender = true },
["L"] = { "ʎ" },
["L\\"] = { "ʟ" },
["M"] = { "ɯ" },
["M\\"] = { "ɰ", has_descender = true },
["N"] = { "ŋ", has_descender = true },
["N\\"] = { "ɴ" },
["O"] = { "ɔ" },
["O\\"] = { "ʘ" },
["P"] = { "ʋ" },
["Q"] = { "ɒ" },
["R"] = { "ʁ" },
["R\\"] = { "ʀ" },
["S"] = { "ʃ", has_descender = true },
["T"] = { "θ" },
["U"] = { "ʊ" },
["U\\"] = { "ʊ̈" },
["V"] = { "ʌ" },
["W"] = { "ʍ" },
["X"] = { "χ", has_descender = true },
["X\\"] = { "ħ" },
["Y"] = { "ʏ" },
["Z"] = { "ʒ", has_descender = true },
["\""] = { "ˈ" },
["%"] = { "ˌ" },
-- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA
["%\\"] = { "ᴙ" },
["'"] = { "ʲ", is_diacritic = true },
[":"] = { "ː", is_diacritic = true },
[":\\"] = { "ˑ", is_diacritic = true },
["@"] = { "ə" },
["@`"] = { "ɚ" },
["@\\"] = { "ɘ" },
["{"] = { "æ" },
["}"] = { "ʉ" },
["1"] = { "ɨ" },
["2"] = { "ø" },
["3"] = { "ɜ" },
["3`"] = { "ɝ" },
["3\\"] = { "ɞ" },
["4"] = { "ɾ" },
["5"] = { "ɫ" },
["6"] = { "ɐ" },
["7"] = { "ɤ" },
["8"] = { "ɵ" },
["9"] = { "œ" },
["&"] = { "ɶ" },
["?"] = { "ʔ" },
["?\\"] = { "ʕ" },
["<\\"] = { "ʢ" },
[">\\"] = { "ʡ" },
["^"] = { "ꜛ" },
["!"] = { "ꜜ" },
-- not in official X-SAMPA
["!!"] = { "‼" },
["!\\"] = { "ǃ" },
["|\\"] = { "ǀ", has_descender = true },
["||"] = { "‖", has_descender = true },
["|\\|\\"] = { "ǁ", has_descender = true },
["=\\"] = { "ǂ", has_descender = true },
-- linking mark, liaison
["-\\"] = { "‿", is_diacritic = true },
-- coarticulated; not in official X-SAMPA
["__"] = { U(0x361) },
-- fortis, strong articulation; not in official X-SAMPA
["_:"] = { U(0x348) },
["_\""] = { U(0x308), is_diacritic = true },
-- advanced
["_+"] = { U(0x31F), with_descender = "˖", is_diacritic = true },
-- retracted
["_-"] = { U(0x320), with_descender = "˗", is_diacritic = true },
-- rising tone
["_/"] = { U(0x30C), is_diacritic = true },
-- voiceless
["_0"] = { U(0x325), with_descender = U(0x30A), is_diacritic = true },
-- syllabic
["="] = { U(0x329), with_descender = U(0x30D), is_diacritic = true },
-- syllabic
["_="] = { U(0x329), with_descender = U(0x30D), is_diacritic = true },
-- strident: not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA
["_%\\"] = { U(0x1DFD) },
-- ejective
["_>"] = { "ʼ", is_diacritic = true },
-- pharyngealized
["_?\\"] = { "ˤ", is_diacritic = true },
-- falling tone
["_\\"] = { U(0x302), is_diacritic = true },
-- non-syllabic
["_^"] = { U(0x32F), is_diacritic = true },
-- no audible release
["_}"] = { U(0x31A), is_diacritic = true },
-- r-coloring (colouring), rhotacization
["`"] = { U(0x2DE), is_diacritic = true },
-- nasalization
["~"] = { U(0x303), is_diacritic = true },
-- advanced tongue root
["_A"] = { U(0x318), is_diacritic = true },
-- apical
["_a"] = { U(0x33A), is_diacritic = true },
-- extra-low tone
["_B"] = { U(0x30F), is_diacritic = true },
-- low rising tone
["_B_L"] = { U(0x1DC5), is_diacritic = true },
-- less rounded
["_c"] = { U(0x31C), is_diacritic = true },
-- dental
["_d"] = { U(0x32A), is_diacritic = true },
-- velarized or pharyngealized (dark)
["_e"] = { U(0x334), is_diacritic = true },
-- downstep
["<F>"] = { "↘" },
-- falling tone
["_F"] = { U(0x302), is_diacritic = true },
-- velarized
["_G"] = { "ˠ", is_diacritic = true },
-- high tone
["_H"] = { U(0x301), is_diacritic = true },
-- high rising tone
["_H_T"] = { U(0x1DC4), is_diacritic = true },
-- aspiration
["_h"] = { "ʰ", is_diacritic = true },
-- voiced aspiration (= breathy voice)
["_h\\"] = { "ʱ", is_diacritic = true },
-- palatalization
["_j"] = { "ʲ", is_diacritic = true },
-- creaky voice, laryngealization, vocal fry
["_k"] = { U(0x330), is_diacritic = true },
-- low tone
["_L"] = { U(0x300), is_diacritic = true },
-- lateral release
["_l"] = { "ˡ", is_diacritic = true },
-- mid tone
["_M"] = { U(0x304), is_diacritic = true },
-- laminal
["_m"] = { U(0x33B), is_diacritic = true },
-- linguolabial
["_N"] = { U(0x33C), is_diacritic = true },
-- nasal release
["_n"] = { "ⁿ", is_diacritic = true },
-- more rounded
["_O"] = { U(0x339), is_diacritic = true },
-- lowered
["_o"] = { U(0x31E), with_descender = "˕", is_diacritic = true },
-- retracted tongue root
["_q"] = { U(0x319), is_diacritic = true },
-- global rise
["<R>"] = { "↗" },
-- rising tone
["_R"] = { U(0x30C), is_diacritic = true },
-- rising falling tone
["_R_F"] = { U(0x1DC8), is_diacritic = true },
-- raised
["_r"] = { U(0x31D), is_diacritic = true },
-- extra-high tone
["_T"] = { U(0x30B), is_diacritic = true },
-- breathy voice, murmured voice, murmur, whispery voice
["_t"] = { U(0x324), is_diacritic = true },
-- voiced
["_v"] = { U(0x32C), is_diacritic = true },
-- labialized
["_w"] = { "ʷ", is_diacritic = true },
-- extra-short
["_X"] = { U(0x306), is_diacritic = true },
-- mid-centralized
["_x"] = { U(0x33D), is_diacritic = true },
["__T"] = { "˥" },
["__H"] = { "˦" },
["__M"] = { "˧" },
["__L"] = { "˨" },
["__B"] = { "˩" },
-- not X-SAMPA; for convenience
["0"] = { "◌" }, -- dotted circle
}
local identical = "acehklmnorstuvwxz"
for char in gmatch(identical, ".") do
data[char] = { char }
end
local identical_with_descender = "jpqy"
for char in gmatch(identical_with_descender, ".") do
data[char] = { char, has_descender = true }
end
return data