Modul:Language/data
local U = mw.ustring.char
-- diacritics
local grave = U(0x300)
local acute = U(0x301)
local double_acute = U(0x30B)
local tilde = U(0x303)
local macron = U(0x304)
local dgrave = U(0x30F)
local invbreve = U(0x311)
--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]
local data = {
["languages"] = {
["ang"] = {
["name"] = "Old English", "Inggris Kuno",
["article"] = {"Bahasa Inggris Kuno"},
-- ["scripts"] = {"Latn"},
-- Remove macrons, acutes, and overdots
["replacements"] = {
["[ĀÁ]"] = "A",
["[āá]"] = "a",
["[ǢǼ]"] = "Æ",
["[ǣǽ]"] = "æ",
["Ċ"] = "C",
["ċ"] = "c",
["[ĒÉ]"] = "E",
["[ēé]"] = "e",
["Ġ"] = "G",
["ġ"] = "g",
["[ĪÍ]"] = "I",
["[īí]"] = "i",
["[ŌÓ]"] = "O",
["[ōó]"] = "o",
["[ŪÚ]"] = "U",
["[ūú]"] = "u",
["[ȲÝ]"] = "Y",
["[ȳý]"] = "y",
},
},
["ar"] = {
["name"] = "Arabic", "Arab",
["article"] = "Bahasa Arab",
-- ["scripts"] = { "Arab" },
--[[ ālif with wasla is replaced by ālif;
taṭwīl, fatḥatan, ḍammatan, kasratan,
fatḥa, ḍamma, kasra,
shadda, sukūn, and superscript (dagger) ālif are removed. ]]
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
[U(0x0671)] = U(0x0627),
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["be"] = {
["article"] = "Belarusian language", "Bahasa Belarusia",
-- ["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", },
},
["bn"] = {
["name"] = "Bengali",
["article"] = "Bahasa Bengali",
-- ["scripts"] = { "Beng" },
},
["cu"] = {
["name"] = "Old Church Slavonic",
["article"] = "Bahasa Slavonik Gerejawi Kuno",
-- ["scripts"] = { "Cyrs" },
},
["de"] = {
["name"] = "German", "Jerman",
["article"] = "Bahasa Jerman",
-- ["scripts"] = { "Latn" },
--[[
["replacements"] = {
["ae"] = "ä",
["oe"] = "ö",
["ue"] = "ü",
["A[Ee]"] = "Ä",
["O[Ee]"] = "Ö",
["U[Ee]"] = "Ü",
},
]]
},
["en"] = {
["name"] = "English", "Inggris",
["article"] = "Bahasa Inggris",
-- ["scripts"] = { "Latn" },
},
["es"] = {
["name"] = "Spanish", "Spanyol",
["article"] = "Bahasa Spanyol",
-- ["scripts"] = { "Latn" },
},
["fr"] = {
["name"] = "French", "Prancis",
["article"] = "Bahasa Prancis",
-- ["scripts"] = { "Latn" },
},
["frm"] = {
["name"] = "Middle French",
["article"] = "Middle French",
-- ["scripts"] = { "Latn" },
},
["gem-pro"] = {
["name"] = "Proto-Germanic", "Proto-Jerman",
["article"] = "Bahasa Proto-Jerman",
-- ["scripts"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
["Wikipedia_code"] = "gem-x-proto",
},
["grc"] = {
["name"] = "Ancient Greek", "Yunani Kuno",
["article"] = "Bahasa Yunani Kuno",
-- ["scripts"] = { "Grek" },
["replacements"] = {
-- Vowels with macrons or breves are replaced with plain letters.
["[ᾱᾰ]"] = "α",
["[ᾹᾸ]"] = "Α",
["[ῑῐ]"] = "ι",
["[ῙῘ]"] = "Ι",
["[ῡῠ]"] = "υ",
["[ῩῨ]"] = "Υ",
["ϐ"] = "β",
["ϵ"] = "ε",
["ϑ"] = "θ",
["ϰ"] = "κ",
["ϱ"] = "ρ",
["ϲ"] = "σ",
["ϕ"] = "φ",
},
},
["got"] = {
["name"] = "Gothic", "Gotik",
["article"] = "Bahasa Gotik",
-- ["scripts"] = { "Goth" },
["replacements"] = {
-- Latin to Gothic since people will not want to have to copy
-- and paste Gothic letters in
["[AÁaáĀā]"] = "𐌰",
["[Bb]"] = "𐌱",
["[Gg]"] = "𐌲",
["[Dd]"] = "𐌳",
["[EeĒē]"] = "𐌴",
["[Qq]"] = "𐌵",
["[Zz]"] = "𐌶",
["[Hh]"] = "𐌷",
["[Þþ]"] = "𐌸",
["[IiÍí]"] = "𐌹",
["[Kk]"] = "𐌺",
["[Ll]"] = "𐌻",
["[Mm]"] = "𐌼",
["[Nn]"] = "𐌽",
["[Jj]"] = "𐌾",
["[UuÚúŪū]"] = "𐌿",
["[Pp]"] = "𐍀",
["[Rr]"] = "𐍂",
["[Ss]"] = "𐍃",
["[Tt]"] = "𐍄",
["[WwYy]"] = "𐍅",
["[Ff]"] = "𐍆",
["[Xx]"] = "𐍇",
["[Ƕƕ]"] = "𐍈", -- Not sure if "hw" and "hv" can safely be converted
["[OoŌō]"] = "𐍉",
},
},
["grk-pro"] = {
["name"] = "Proto-Hellenic",
["Wikipedia_name"] = "Proto-Greek",
["article"] = "Bahasa Proto-Yunani",
-- ["scripts"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
},
["hi"] = {
["name"] = "Hindi",
["article"] = "Bahasa Hindi",
-- ["scripts"] = { "Deva" },
},
["ine-pro"] = {
["name"] = "Proto-Indo-European",
["article"] = "Bahasa Proto-Indo-Eropa",
-- ["scripts"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
["Wikipedia_code"] = "ine-x-proto",
},
["ja"] = {
["name"] = "Japanese", "Jepang",
["article"] = "Bahasa Jepang",
-- ["scripts"] = { "Jpan" },
},
["la"] = {
["name"] = "Latin",
["article"] = "Bahasa Latin",
-- ["scripts"] = { "Latn" },
["replacements"] = {
-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
["[ĀĂ]"] = "A",
["[āă]"] = "a",
["[ĒĔ]"] = "E",
["[ēĕë]"] = "e",
["[ĪĬÏ]"] = "I",
["[īĭï]"] = "i",
["[ŌŎ]"] = "O",
["[ōŏ]"] = "o",
["[ŪŬÜ]"] = "U",
["[ūŭü]"] = "u",
["Ȳ"] = "Y",
["ȳ"] = "y"
},
},
["mul"] = {
["name"] = "Translingual",
["article"] = "",
-- ["scripts"] = { "" },
},
["nci"] = {
["name"] = "Nahuatl",
["article"] = "Nahuatl",
-- ["scripts"] = {"Latn"},
-- Remove macrons, acutes, circumflexes and graves
["replacements"] = {
["[ĀÁÀÂ]"] = "A",
["[āáàâ]"] = "a",
["[ĒÉÈÊ]"] = "E",
["[ēéèê]"] = "e",
["[ĪÍÌÎ]"] = "I",
["[īíìî]"] = "i",
["[ŌÓÒÔ]"] = "O",
["[ōóòô]"] = "o",
["[ŪÚÙÛ]"] = "U",
["[ūúùû]"] = "u",
["[" .. grave .. acute .. macron .. circumflex .. "]"] = "",
-- Remove saltillo, see [[Saltillo (linguistics)]]
["[Ꞌꞌʻʼ'ʔ]"] = "",
},
},
["orv"] = {
["name"] = "Old East Slavic",
["article"] = "Bahasa Slavik Timur Kuno",
-- ["scripts"] = { "Cyrs" },
["replacements"] = {
[U(0x484)] = "",
},
},
["pt"] = {
["name"] = "Portuguese", "Portugis",
["article"] = "Bahasa Portugis",
-- ["scripts"] = { "Latn" },
},
["pa"] = {
["name"] = "Punjabi",
["article"] = "Bahasa Punjabi",
-- ["scripts"] = { "Guru", "Arab", },
},
["ru"] = {
["name"] = "Russian", "Rusia",
["article"] = "Bahasa Rusia",
-- ["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", },
},
["se"] = {
["replacements"] = {
["([đflmnŋrsšŧv])'%1"] = "%1%1",
},
},
["sh"] = {
["article"] = "Bahasa Serbo-Kroasia",
-- ["scripts"] = { "Latn", "Cyrl" },
["replacements"] = {
["[ȀÀȂÁĀÃ]"] = "A",
["[ȁàȃáāã]"] = "a",
["[ȄÈȆÉĒẼ]"] = "E",
["[ȅèȇéēẽ]"] = "e",
["[ȈÌȊÍĪĨ]"] = "I",
["[ȉìȋíīĩ]"] = "i",
["[ȌÒȎÓŌÕ]"] = "O",
["[ȍòȏóōõ]"] = "o",
["[ȐȒŔ]"] = "R",
["[ȑȓŕ]"] = "r",
["[ȔÙȖÚŪŨ]"] = "U",
["[ȕùȗúūũ]"] = "u",
["Ѐ"] = "Е",
["ѐ"] = "е",
["[ӢЍ]"] = "И",
["[ӣѝ]"] = "и",
["[Ӯ]"] = "У",
["[ӯ]"] = "у"
},
},
["sla-pro"] = {
["name"] = "Bahasa Proto-Slavik", -- also Common Slavic
["type"] = "reconstructed",
-- ["scripts"] = { "Latn" },
["replacements"] = {
["[ÀÁÃĀȀȂ]"] = "A",
["[àáãāȁȃ]"] = "a",
["[ÈÉẼĒȄȆ]"] = "E",
["[èéẽēȅȇ]"] = "e",
["[ÌÍĨĪȈȊ]"] = "I",
["[ìíĩīȉȋ]"] = "i",
["[ÒÓÕŌȌȎŐ]"] = "O",
["[òóõōȍȏő]"] = "o",
["[ÙÚŨŪȔȖŰ]"] = "U",
["[ùúũūȕȗű]"] = "u",
["[ỲÝỸȲ]"] = "Y",
["[ỳýỹȳ]"] = "y",
["Ǭ"] = "Ǫ",
["ǭ"] = "ǫ",
["[" .. grave .. acute .. double_acute .. tilde .. macron .. dgrave .. invbreve .. "]"] = "",
},
},
["uk"] = {
["article"] = "Bahasa Ukraina",
-- ["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", }
},
["ur"] = {
["name"] = "Urdu",
["article"] = "Bahasa Urdu",
-- ["scripts"] = { "Arab" },
},
["zh"] = {
["name"] = "Chinese", "Tionghoa",
["article"] = "Bahasa Tionghoa",
-- ["scripts"] = { "Hani" },
},
["xcl"] = {
["name"] = "Old Armenian", "Armenia Kuno",
["article"] = "Bahasa Armenia Klasik",
-- ["scripts"] = { "Armn" },
["replacements"] = {
["[՞՜՛՟]"] = "",
["և"] = "եւ",
},
},
["xvn"] = {
["name"] = "Vandalic", "Vandal",
["article"] = "Bahasa Vandal",
-- ["scripts"] = { "Latn" },
},
--[[
[""] = {
["name"] = "",
["article"] = "",
-- ["scripts"] = { "" },
},
[""] = {
["name"] = "",
["article"] = "",
-- ["scripts"] = { "" },
["replacements"] = {
},
},
]]
},
["redirects"] = {
["gem"] = "gem-pro", -- Not correct, but is commonly used.
["gem-x-proto"] = "gem-pro",
["ine"] = "ine-pro", -- Not correct, but might be commonly used.
["ine-x-proto"] = "ine-pro",
},
}
return data