انتقل إلى المحتوى

وحدة:Unicode data

من ويكاموس، القاموس الحر

Data modules[عدل]

Unicode name and image modules,
organized by first three digits of codepoint in hexadecimal base
0 1 2 3 4 5 6 7 8 9 A B C D E F
00x names names names names names           names     names   names
images images images images images images images images images images images         images
01x names names names names names   names   names   names names names names names names
images images images images images   images images images   images images images images images images
02x                               names
images images images images images images images images images images images images images images images images
images images images                          
0Ex names                              

local export = {}

-- http://www.unicode.org/Public/UNIDATA/Jamo.txt
local hangul_leads = {
	[0] = "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S", "SS",
	"", "J", "JJ", "C", "K", "T", "P", "H"

local hangul_vowels = {
	[0] = "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA",
	"WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI",

local hangul_trails = {
	[0] = "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM", "LB",
	"LS", "LT", "LP", "LH", "M", "B", "BS", "S", "SS", "NG", "J", "C", "K",
	"T", "P", "H"

local name_hooks = {
	{     0x00,     0x1f, "<control-%04X>" }, -- C0 control characters
	{     0x80,     0x9f, "<control-%04X>" }, -- C1 control characters
	{   0x3400,   0x4db5, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension A
	{   0x4e00,   0x9fcc, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph
	{   0xac00,   0xd7a3, function (codepoint)
		local m_hangul = require('Module:ko-hangul')
		local li, vi, ti = m_hangul.syllableIndex2JamoIndices(
			codepoint - 0xac00
		return ("HANGUL SYLLABLE %s%s%s"):format(
			hangul_leads[li], -- I hate one-based indexing
			hangul_trails[ti]  -- never mind, I can live with it
	end },
	{   0xd800,   0xdb7f, "<surrogate-%04X>" }, -- Non Private Use High Surrogate
	{   0xdb80,   0xdbff, "<surrogate-%04X>" }, -- Private Use High Surrogate
	{   0xdc00,   0xdfff, "<surrogate-%04X>" }, -- Low Surrogate
	{   0xe000,   0xf8ff, "<private-use-%04X>" }, -- Private Use
	{  0x20000,  0x2a6d6, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension B
	{  0x2a700,  0x2b734, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension C
	{  0x2a740,  0x2b81d, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension D
	{  0x2b820,  0x2ceaf, "CJK UNIFIED IDEOGRAPH-%04X" }, -- CJK Ideograph Extension E
	{  0x2f800,  0x2fa1d, "CJK COMPATIBILITY IDEOGRAPH-%05X" }, -- CJK Compatibility Ideographs Supplement (Supplementary Ideographic Plane)
	{  0xf0000,  0xffffd, "<private-use-%05X>" }, -- Plane 15 Private Use
	{ 0x100000, 0x10fffd, "<private-use-%06X>" }  -- Plane 16 Private Use

local noncharacters = {
	[  0xfdd0] = true, [  0xfdd1] = true,
	[  0xfdd2] = true, [  0xfdd3] = true,
	[  0xfdd4] = true, [  0xfdd5] = true,
	[  0xfdd6] = true, [  0xfdd7] = true,
	[  0xfdd8] = true, [  0xfdd9] = true,
	[  0xfdda] = true, [  0xfddb] = true,
	[  0xfddc] = true, [  0xfddd] = true,
	[  0xfdde] = true, [  0xfddf] = true,
	[  0xfde0] = true, [  0xfde1] = true,
	[  0xfde2] = true, [  0xfde3] = true,
	[  0xfde4] = true, [  0xfde5] = true,
	[  0xfde6] = true, [  0xfde7] = true,
	[  0xfde8] = true, [  0xfde9] = true,
	[  0xfdea] = true, [  0xfdeb] = true,
	[  0xfdec] = true, [  0xfded] = true,
	[  0xfdee] = true, [  0xfdef] = true,
	[  0xfffe] = true, [  0xffff] = true,
	[ 0x1fffe] = true, [ 0x1ffff] = true,
	[ 0x2fffe] = true, [ 0x2ffff] = true,
	[ 0x3fffe] = true, [ 0x3ffff] = true,
	[ 0x4fffe] = true, [ 0x4ffff] = true,
	[ 0x5fffe] = true, [ 0x5ffff] = true,
	[ 0x6fffe] = true, [ 0x6ffff] = true,
	[ 0x7fffe] = true, [ 0x7ffff] = true,
	[ 0x8fffe] = true, [ 0x8ffff] = true,
	[ 0x9fffe] = true, [ 0x9ffff] = true,
	[ 0xafffe] = true, [ 0xaffff] = true,
	[ 0xbfffe] = true, [ 0xbffff] = true,
	[ 0xcfffe] = true, [ 0xcffff] = true,
	[ 0xdfffe] = true, [ 0xdffff] = true,
	[ 0xefffe] = true, [ 0xeffff] = true,
	[ 0xffffe] = true, [ 0xfffff] = true,
	[0x10fffe] = true, [0x10ffff] = true

local name_range_cache

function export.lookup_name(codepoint)
	if noncharacters[codepoint] then
		return ("<noncharacter-%.4X>"):format(codepoint)
	if name_range_cache then
		if (codepoint >= name_range_cache[1]) and (codepoint <= name_range_cache[2]) then
			if type(name_range_cache[3]) == "string" then
				return name_range_cache[3]:format(codepoint)
				return name_range_cache[3](codepoint)

	for _, item in ipairs(name_hooks) do
		if (codepoint >= item[1]) and (codepoint <= item[2]) then
			name_range_cache = item
			if type(item[3]) == "string" then
				return item[3]:format(codepoint)
				return item[3](codepoint)
		elseif codepoint < item[1] then

	local success, data = pcall(mw.loadData,
		('Module:Unicode data/names/%03X'):format(
			math.floor(codepoint / 0x1000)

	return (success and data[codepoint]) or ("<U-%06X>"):format(codepoint)

function export.template_lookup_name(frame)
	local codepoint = tonumber(frame.args[1] or frame:getParent().args[1])
	local name = export.lookup_name(codepoint)
	return name:gsub("<", "&lt;")

local planes = {
	[ 0] = "Basic Multilingual Plane";
	[ 1] = "Supplementary Multilingual Plane";
	[ 2] = "Supplementary Ideographic Plane";
	[13] = "Supplementary Special-purpose Plane";
	[14] = "Supplement­ary Private Use Area-A";
	[15] = "Supplement­ary Private Use Area-B";

-- http://www.unicode.org/Public/UNIDATA/Blocks.txt
local blocks = {
	["لاتينية أساسية"                                     ] = { 0x000000, 0x00007f };
	["Latin-1 Supplement"                              ] = { 0x000080, 0x0000ff };
	["Latin Extended-A"                                ] = { 0x000100, 0x00017f };
	["Latin Extended-B"                                ] = { 0x000180, 0x00024f };
	["IPA Extensions"                                  ] = { 0x000250, 0x0002af };
	["Spacing Modifier Letters"                        ] = { 0x0002b0, 0x0002ff };
	["Combining Diacritical Marks"                     ] = { 0x000300, 0x00036f };
	["يونانية وقبطية"                                ] = { 0x000370, 0x0003ff };
	["كيريلية"                                        ] = { 0x000400, 0x0004ff };
	["إضافات كيريلية"                             ] = { 0x000500, 0x00052f };
	["أرمنية"                                        ] = { 0x000530, 0x00058f };
	["عبرية"                                          ] = { 0x000590, 0x0005ff };
	["عربية"                                          ] = { 0x000600, 0x0006ff };
	["سريانية"                                          ] = { 0x000700, 0x00074f };
	["إضافات عربية"                               ] = { 0x000750, 0x00077f };
	["Thaana"                                          ] = { 0x000780, 0x0007bf };
	["نكو"                                             ] = { 0x0007c0, 0x0007ff };
	["Samaritan"                                       ] = { 0x000800, 0x00083f };
	["مندائية"                                         ] = { 0x000840, 0x00085f };
	["Arabic Extended-A"                               ] = { 0x0008a0, 0x0008ff };
	["ديوناكري"                                      ] = { 0x000900, 0x00097f };
	["Bengali"                                         ] = { 0x000980, 0x0009ff };
	["Gurmukhi"                                        ] = { 0x000a00, 0x000a7f };
	["غوجاراتية"                                        ] = { 0x000a80, 0x000aff };
	["أوريا"                                           ] = { 0x000b00, 0x000b7f };
	["تاميلية"                                           ] = { 0x000b80, 0x000bff };
	["Telugu"                                          ] = { 0x000c00, 0x000c7f };
	["كنادية"                                         ] = { 0x000c80, 0x000cff };
	["Malayalam"                                       ] = { 0x000d00, 0x000d7f };
	["سنهالية"                                         ] = { 0x000d80, 0x000dff };
	["Thai"                                            ] = { 0x000e00, 0x000e7f };
	["لاو"                                             ] = { 0x000e80, 0x000eff };
	["Tibetan"                                         ] = { 0x000f00, 0x000fff };
	["Myanmar"                                         ] = { 0x001000, 0x00109f };
	["Georgian"                                        ] = { 0x0010a0, 0x0010ff };
	["هانغل جامو"                                     ] = { 0x001100, 0x0011ff };
	["Ethiopic"                                        ] = { 0x001200, 0x00137f };
	["Ethiopic Supplement"                             ] = { 0x001380, 0x00139f };
	["شيروكية"                                        ] = { 0x0013a0, 0x0013ff };
	["Unified Canadian Aboriginal Syllabics"           ] = { 0x001400, 0x00167f };
	["Ogham"                                           ] = { 0x001680, 0x00169f };
	["Runic"                                           ] = { 0x0016a0, 0x0016ff };
	["Tagalog"                                         ] = { 0x001700, 0x00171f };
	["Hanunoo"                                         ] = { 0x001720, 0x00173f };
	["Buhid"                                           ] = { 0x001740, 0x00175f };
	["Tagbanwa"                                        ] = { 0x001760, 0x00177f };
	["خميرية"                                           ] = { 0x001780, 0x0017ff };
	["منغولية"                                       ] = { 0x001800, 0x0018af };
	["Unified Canadian Aboriginal Syllabics Extended"  ] = { 0x0018b0, 0x0018ff };
	["Limbu"                                           ] = { 0x001900, 0x00194f };
	["Tai Le"                                          ] = { 0x001950, 0x00197f };
	["New Tai Lue"                                     ] = { 0x001980, 0x0019df };
	["Khmer Symbols"                                   ] = { 0x0019e0, 0x0019ff };
	["بجينيزية"                                        ] = { 0x001a00, 0x001a1f };
	["Tai Tham"                                        ] = { 0x001a20, 0x001aaf };
	["Combining Diacritical Marks Extended"            ] = { 0x001ab0, 0x001aff };
	["بالية"                                        ] = { 0x001b00, 0x001b7f };
	["Sundanese"                                       ] = { 0x001b80, 0x001bbf };
	["Batak"                                           ] = { 0x001bc0, 0x001bff };
	["Lepcha"                                          ] = { 0x001c00, 0x001c4f };
	["Ol Chiki"                                        ] = { 0x001c50, 0x001c7f };
	["Sundanese Supplement"                            ] = { 0x001cc0, 0x001ccf };
	["Vedic Extensions"                                ] = { 0x001cd0, 0x001cff };
	["Phonetic Extensions"                             ] = { 0x001d00, 0x001d7f };
	["Phonetic Extensions Supplement"                  ] = { 0x001d80, 0x001dbf };
	["Combining Diacritical Marks Supplement"          ] = { 0x001dc0, 0x001dff };
	["Latin Extended Additional"                       ] = { 0x001e00, 0x001eff };
	["Greek Extended"                                  ] = { 0x001f00, 0x001fff };
	["General Punctuation"                             ] = { 0x002000, 0x00206f };
	["Superscripts and Subscripts"                     ] = { 0x002070, 0x00209f };
	["Currency Symbols"                                ] = { 0x0020a0, 0x0020cf };
	["Combining Diacritical Marks for Symbols"         ] = { 0x0020d0, 0x0020ff };
	["Letterlike Symbols"                              ] = { 0x002100, 0x00214f };
	["أشكال عدد"                                    ] = { 0x002150, 0x00218f };
	["Arrows"                                          ] = { 0x002190, 0x0021ff };
	["Mathematical Operators"                          ] = { 0x002200, 0x0022ff };
	["Miscellaneous Technical"                         ] = { 0x002300, 0x0023ff };
	["Control Pictures"                                ] = { 0x002400, 0x00243f };
	["Optical Character Recognition"                   ] = { 0x002440, 0x00245f };
	["Enclosed Alphanumerics"                          ] = { 0x002460, 0x0024ff };
	["Box Drawing"                                     ] = { 0x002500, 0x00257f };
	["Block Elements"                                  ] = { 0x002580, 0x00259f };
	["Geometric Shapes"                                ] = { 0x0025a0, 0x0025ff };
	["Miscellaneous Symbols"                           ] = { 0x002600, 0x0026ff };
	["Dingbats"                                        ] = { 0x002700, 0x0027bf };
	["Miscellaneous Mathematical Symbols-A"            ] = { 0x0027c0, 0x0027ef };
	["Supplemental Arrows-A"                           ] = { 0x0027f0, 0x0027ff };
	["نمط بريل"                                ] = { 0x002800, 0x0028ff };
	["Supplemental Arrows-B"                           ] = { 0x002900, 0x00297f };
	["Miscellaneous Mathematical Symbols-B"            ] = { 0x002980, 0x0029ff };
	["Supplemental Mathematical Operators"             ] = { 0x002a00, 0x002aff };
	["Miscellaneous Symbols and Arrows"                ] = { 0x002b00, 0x002bff };
	["Glagolitic"                                      ] = { 0x002c00, 0x002c5f };
	["Latin Extended-C"                                ] = { 0x002c60, 0x002c7f };
	["Coptic"                                          ] = { 0x002c80, 0x002cff };
	["إضافات جورجية"                             ] = { 0x002d00, 0x002d2f };
	["Tifinagh"                                        ] = { 0x002d30, 0x002d7f };
	["Ethiopic Extended"                               ] = { 0x002d80, 0x002ddf };
	["Cyrillic Extended-A"                             ] = { 0x002de0, 0x002dff };
	["Supplemental Punctuation"                        ] = { 0x002e00, 0x002e7f };
	["CJK Radicals Supplement"                         ] = { 0x002e80, 0x002eff };
	["Kangxi Radicals"                                 ] = { 0x002f00, 0x002fdf };
	["Ideographic Description Characters"              ] = { 0x002ff0, 0x002fff };
	["CJK Symbols and Punctuation"                     ] = { 0x003000, 0x00303f };
	["Hiragana"                                        ] = { 0x003040, 0x00309f };
	["Katakana"                                        ] = { 0x0030a0, 0x0030ff };
	["Bopomofo"                                        ] = { 0x003100, 0x00312f };
	["Hangul Compatibility Jamo"                       ] = { 0x003130, 0x00318f };
	["Kanbun"                                          ] = { 0x003190, 0x00319f };
	["Bopomofo Extended"                               ] = { 0x0031a0, 0x0031bf };
	["CJK Strokes"                                     ] = { 0x0031c0, 0x0031ef };
	["Katakana Phonetic Extensions"                    ] = { 0x0031f0, 0x0031ff };
	["Enclosed CJK Letters and Months"                 ] = { 0x003200, 0x0032ff };
	["CJK Compatibility"                               ] = { 0x003300, 0x0033ff };
	["CJK Unified Ideographs Extension A"              ] = { 0x003400, 0x004dbf };
	["Yijing Hexagram Symbols"                         ] = { 0x004dc0, 0x004dff };
	["CJK Unified Ideographs"                          ] = { 0x004e00, 0x009fff };
	["Yi Syllables"                                    ] = { 0x00a000, 0x00a48f };
	["Yi Radicals"                                     ] = { 0x00a490, 0x00a4cf };
	["Lisu"                                            ] = { 0x00a4d0, 0x00a4ff };
	["Vai"                                             ] = { 0x00a500, 0x00a63f };
	["Cyrillic Extended-B"                             ] = { 0x00a640, 0x00a69f };
	["Bamum"                                           ] = { 0x00a6a0, 0x00a6ff };
	["Modifier Tone Letters"                           ] = { 0x00a700, 0x00a71f };
	["Latin Extended-D"                                ] = { 0x00a720, 0x00a7ff };
	["Syloti Nagri"                                    ] = { 0x00a800, 0x00a82f };
	["Common Indic Number Forms"                       ] = { 0x00a830, 0x00a83f };
	["Phags-pa"                                        ] = { 0x00a840, 0x00a87f };
	["Saurashtra"                                      ] = { 0x00a880, 0x00a8df };
	["Devanagari Extended"                             ] = { 0x00a8e0, 0x00a8ff };
	["Kayah Li"                                        ] = { 0x00a900, 0x00a92f };
	["Rejang"                                          ] = { 0x00a930, 0x00a95f };
	["Hangul Jamo Extended-A"                          ] = { 0x00a960, 0x00a97f };
	["جاوية"                                        ] = { 0x00a980, 0x00a9df };
	["Myanmar Extended-B"                              ] = { 0x00a9e0, 0x00a9ff };
	["Cham"                                            ] = { 0x00aa00, 0x00aa5f };
	["Myanmar Extended-A"                              ] = { 0x00aa60, 0x00aa7f };
	["Tai Viet"                                        ] = { 0x00aa80, 0x00aadf };
	["Meetei Mayek Extensions"                         ] = { 0x00aae0, 0x00aaff };
	["Ethiopic Extended-A"                             ] = { 0x00ab00, 0x00ab2f };
	["Latin Extended-E"                                ] = { 0x00ab30, 0x00ab6f };
	["Cherokee Supplement"                             ] = { 0x00ab70, 0x00abbf };
	["Meetei Mayek"                                    ] = { 0x00abc0, 0x00abff };
	["Hangul Syllables"                                ] = { 0x00ac00, 0x00d7af };
	["Hangul Jamo Extended-B"                          ] = { 0x00d7b0, 0x00d7ff };
	["High Surrogates"                                 ] = { 0x00d800, 0x00db7f };
	["High Private Use Surrogates"                     ] = { 0x00db80, 0x00dbff };
	["Low Surrogates"                                  ] = { 0x00dc00, 0x00dfff };
	["Private Use Area"                                ] = { 0x00e000, 0x00f8ff };
	["CJK Compatibility Ideographs"                    ] = { 0x00f900, 0x00faff };
	["Alphabetic Presentation Forms"                   ] = { 0x00fb00, 0x00fb4f };
	["Arabic Presentation Forms-A"                     ] = { 0x00fb50, 0x00fdff };
	["Variation Selectors"                             ] = { 0x00fe00, 0x00fe0f };
	["Vertical Forms"                                  ] = { 0x00fe10, 0x00fe1f };
	["Combining Half Marks"                            ] = { 0x00fe20, 0x00fe2f };
	["CJK Compatibility Forms"                         ] = { 0x00fe30, 0x00fe4f };
	["Small Form Variants"                             ] = { 0x00fe50, 0x00fe6f };
	["Arabic Presentation Forms-B"                     ] = { 0x00fe70, 0x00feff };
	["Halfwidth and Fullwidth Forms"                   ] = { 0x00ff00, 0x00ffef };
	["خاصة"                                        ] = { 0x00fff0, 0x00ffff };
	["Linear B Syllabary"                              ] = { 0x010000, 0x01007f };
	["Linear B Ideograms"                              ] = { 0x010080, 0x0100ff };
	["Aegean Numbers"                                  ] = { 0x010100, 0x01013f };
	["Ancient Greek Numbers"                           ] = { 0x010140, 0x01018f };
	["رموز قديمة"                                 ] = { 0x010190, 0x0101cf };
	["Phaistos Disc"                                   ] = { 0x0101d0, 0x0101ff };
	["Lycian"                                          ] = { 0x010280, 0x01029f };
	["Carian"                                          ] = { 0x0102a0, 0x0102df };
	["Coptic Epact Numbers"                            ] = { 0x0102e0, 0x0102ff };
	["Old Italic"                                      ] = { 0x010300, 0x01032f };
	["قوطية"                                          ] = { 0x010330, 0x01034f };
	["Old Permic"                                      ] = { 0x010350, 0x01037f };
	["أوغاريتية"                                        ] = { 0x010380, 0x01039f };
	["فارسية قديمة"                                     ] = { 0x0103a0, 0x0103df };
	["Deseret"                                         ] = { 0x010400, 0x01044f };
	["Shavian"                                         ] = { 0x010450, 0x01047f };
	["Osmanya"                                         ] = { 0x010480, 0x0104af };
	["Elbasan"                                         ] = { 0x010500, 0x01052f };
	["Caucasian Albanian"                              ] = { 0x010530, 0x01056f };
	["Linear A"                                        ] = { 0x010600, 0x01077f };
	["Cypriot Syllabary"                               ] = { 0x010800, 0x01083f };
	["Imperial Aramaic"                                ] = { 0x010840, 0x01085f };
	["Palmyrene"                                       ] = { 0x010860, 0x01087f };
	["Nabataean"                                       ] = { 0x010880, 0x0108af };
	["Hatran"                                          ] = { 0x0108e0, 0x0108ff };
	["فينيقية"                                      ] = { 0x010900, 0x01091f };
	["ليديونية"                                          ] = { 0x010920, 0x01093f };
	["Meroitic Hieroglyphs"                            ] = { 0x010980, 0x01099f };
	["Meroitic Cursive"                                ] = { 0x0109a0, 0x0109ff };
	["Kharoshthi"                                      ] = { 0x010a00, 0x010a5f };
	["عربية جنوبية قديمة"                               ] = { 0x010a60, 0x010a7f };
	["عربية شمالية قديمة"                               ] = { 0x010a80, 0x010a9f };
	["Manichaean"                                      ] = { 0x010ac0, 0x010aff };
	["Avestan"                                         ] = { 0x010b00, 0x010b3f };
	["Inscriptional Parthian"                          ] = { 0x010b40, 0x010b5f };
	["Inscriptional Pahlavi"                           ] = { 0x010b60, 0x010b7f };
	["Psalter Pahlavi"                                 ] = { 0x010b80, 0x010baf };
	["Old Turkic"                                      ] = { 0x010c00, 0x010c4f };
	["مجرية قديمة"                                   ] = { 0x010c80, 0x010cff };
	["Rumi Numeral Symbols"                            ] = { 0x010e60, 0x010e7f };
	["Brahmi"                                          ] = { 0x011000, 0x01107f };
	["Kaithi"                                          ] = { 0x011080, 0x0110cf };
	["Sora Sompeng"                                    ] = { 0x0110d0, 0x0110ff };
	["Chakma"                                          ] = { 0x011100, 0x01114f };
	["Mahajani"                                        ] = { 0x011150, 0x01117f };
	["Sharada"                                         ] = { 0x011180, 0x0111df };
	["Sinhala Archaic Numbers"                         ] = { 0x0111e0, 0x0111ff };
	["Khojki"                                          ] = { 0x011200, 0x01124f };
	["Multani"                                         ] = { 0x011280, 0x0112af };
	["Khudawadi"                                       ] = { 0x0112b0, 0x0112ff };
	["Grantha"                                         ] = { 0x011300, 0x01137f };
	["Tirhuta"                                         ] = { 0x011480, 0x0114df };
	["Siddham"                                         ] = { 0x011580, 0x0115ff };
	["Modi"                                            ] = { 0x011600, 0x01165f };
	["Takri"                                           ] = { 0x011680, 0x0116cf };
	["Ahom"                                            ] = { 0x011700, 0x01173f };
	["Warang Citi"                                     ] = { 0x0118a0, 0x0118ff };
	["Pau Cin Hau"                                     ] = { 0x011ac0, 0x011aff };
	["مسمارية"                                       ] = { 0x012000, 0x0123ff };
	["Cuneiform Numbers and Punctuation"               ] = { 0x012400, 0x01247f };
	["Early Dynastic Cuneiform"                        ] = { 0x012480, 0x01254f };
	["هيرغليفية مصرية"                            ] = { 0x013000, 0x01342f };
	["Anatolian Hieroglyphs"                           ] = { 0x014400, 0x01467f };
	["Bamum Supplement"                                ] = { 0x016800, 0x016a3f };
	["Mro"                                             ] = { 0x016a40, 0x016a6f };
	["Bassa Vah"                                       ] = { 0x016ad0, 0x016aff };
	["Pahawh Hmong"                                    ] = { 0x016b00, 0x016b8f };
	["Miao"                                            ] = { 0x016f00, 0x016f9f };
	["Kana Supplement"                                 ] = { 0x01b000, 0x01b0ff };
	["Duployan"                                        ] = { 0x01bc00, 0x01bc9f };
	["Shorthand Format Controls"                       ] = { 0x01bca0, 0x01bcaf };
	["Byzantine Musical Symbols"                       ] = { 0x01d000, 0x01d0ff };
	["Musical Symbols"                                 ] = { 0x01d100, 0x01d1ff };
	["Ancient Greek Musical Notation"                  ] = { 0x01d200, 0x01d24f };
	["Tai Xuan Jing Symbols"                           ] = { 0x01d300, 0x01d35f };
	["Counting Rod Numerals"                           ] = { 0x01d360, 0x01d37f };
	["Mathematical Alphanumeric Symbols"               ] = { 0x01d400, 0x01d7ff };
	["Sutton SignWriting"                              ] = { 0x01d800, 0x01daaf };
	["Mende Kikakui"                                   ] = { 0x01e800, 0x01e8df };
	["Arabic Mathematical Alphabetic Symbols"          ] = { 0x01ee00, 0x01eeff };
	["Mahjong Tiles"                                   ] = { 0x01f000, 0x01f02f };
	["Domino Tiles"                                    ] = { 0x01f030, 0x01f09f };
	["Playing Cards"                                   ] = { 0x01f0a0, 0x01f0ff };
	["Enclosed Alphanumeric Supplement"                ] = { 0x01f100, 0x01f1ff };
	["Enclosed Ideographic Supplement"                 ] = { 0x01f200, 0x01f2ff };
	["Miscellaneous Symbols and Pictographs"           ] = { 0x01f300, 0x01f5ff };
	["تعبيرات"                                       ] = { 0x01f600, 0x01f64f };
	["Ornamental Dingbats"                             ] = { 0x01f650, 0x01f67f };
	["رموز نقل وخرائط"                       ] = { 0x01f680, 0x01f6ff };
	["رموز خيميائية"                              ] = { 0x01f700, 0x01f77f };
	["Geometric Shapes Extended"                       ] = { 0x01f780, 0x01f7ff };
	["Supplemental Arrows-C"                           ] = { 0x01f800, 0x01f8ff };
	["Supplemental Symbols and Pictographs"            ] = { 0x01f900, 0x01f9ff };
	["CJK Unified Ideographs Extension B"              ] = { 0x020000, 0x02a6df };
	["CJK Unified Ideographs Extension C"              ] = { 0x02a700, 0x02b73f };
	["CJK Unified Ideographs Extension D"              ] = { 0x02b740, 0x02b81f };
	["CJK Unified Ideographs Extension E"              ] = { 0x02b820, 0x02ceaf };
	["CJK Compatibility Ideographs Supplement"         ] = { 0x02f800, 0x02fa1f };
	["Tags"                                            ] = { 0x0e0000, 0x0e007f };
	["Variation Selectors Supplement"                  ] = { 0x0e0100, 0x0e01ef };
	["Supplementary Private Use Area-A"                ] = { 0x0f0000, 0x0fffff };
	["Supplementary Private Use Area-B"                ] = { 0x100000, 0x10ffff };

function export.enum_blocks()
	local list = {}

	for name, range in pairs(blocks) do
		table.insert(list, { name, range[1], range[2] })
	table.sort(list, function (apple, orange)
		return apple[2] < orange[2]
	return function (list, i)
		local data = list[i + 1]
		if not data then
			return nil	
		return i + 1, data[1], data[2], data[3]
	end, list, 0

function export.lookup_plane(codepoint)
	local i = math.floor(codepoint / 0x10000)
	return planes[i] or ("Plane %u"):format(i)

function export.lookup_block(codepoint)
	for name, range in pairs(blocks) do
		if (codepoint >= range[1]) and (codepoint <= range[2]) then
			return name

function export.get_block_range(name)
	local range = blocks[name]
	if range then
		return range[1], range[2]

function export.is_valid_pagename(pagename)
	local has_nonws = false
	for cp in mw.ustring.gcodepoint(pagename) do
		if false
		or (cp == 0x0023) -- #
		or (cp == 0x005b) -- [
		or (cp == 0x005d) -- ]
		or (cp == 0x007b) -- {
		or (cp == 0x007c) -- |
		or (cp == 0x007d) -- }
		or (cp == 0x180e) -- MONGOLIAN VOWEL SEPARATOR
		or ((cp >= 0x2000) and (cp <= 0x200a))
		or (cp == 0xfffd) -- REPLACEMENT CHARACTER
			return false	
		local printable, result = export.is_printable(cp)
		if not printable then
			return false
		if result ~= "space-separator" then
			has_nonws = true	

	return has_nonws

local function manual_unpack(what, from)
	local result = {}
	from = from or 1
	for i, item in ipairs(what) do
		if i >= from then
			table.insert(result, item)
	return unpack(result)

local function memo_lookup(loader, match_func, ...)
	local dots = { ... }
	local cache = {}
	local singles, ranges
	return function (codepoint)
		if not singles then
			singles, ranges = loader()	
		if singles[codepoint] then
			return match_func(codepoint, singles[codepoint])	
		local lastlast = -1
		for _, range in pairs(cache) do
			if (range[1] <= codepoint) and (codepoint <= range[2]) then
				return match_func(codepoint, unpack(range, 3))
		for _, range in pairs(ranges) do
			if (range[1] <= codepoint) and (codepoint <= range[2]) then
				table.insert(cache, { manual_unpack(range) })
				return match_func(codepoint, manual_unpack(range, 3))
			elseif codepoint < range[1] then
				table.insert(cache, { lastlast + 1, range[1] - 1, unpack(dots) })
				return match_func(codepoint, unpack(dots))
				lastlast = range[2]
		return match_func(codepoint)

export.is_combining = memo_lookup(function ()
	local m_comb = mw.loadData('Module:Unicode data/combining')
	return m_comb.single, m_comb.ranges
end, function (codepoint, cc)
	return cc and (cc ~= 0)
end, 0)

local lookup_control = memo_lookup(function ()
	local m_cc = mw.loadData('Module:Unicode data/control')
	return m_cc.single, m_cc.ranges
end, function (codepoint, ccc)
	return ccc or "assigned"
end, "assigned")

function export.is_assigned(codepoint)
	return lookup_control(codepoint) ~= "unassigned"

function export.is_printable(codepoint)
	local result = lookup_control(codepoint)
	return (result == "assigned") or (result == "space-separator"), result

function export.is_whitespace(codepoint)
	local result = lookup_control(codepoint)
	return (result == "space-separator"), result

-- to be used in language-neutral context only (e.g. character lists)

local script_pats

local script_blacklist = {
	["Latf"] = true;
	["Hans"] = true;
	["Hant"] = true;
	["Kore"] = true;
	["Jpan"] = true;

local script_cache = {}

function export.get_script(codepoint)
	local text = mw.ustring.char(codepoint)

	for pat, sc in pairs(script_cache) do
		if mw.ustring.match(text, pat) then
			return sc
	if not script_pats then
		local m_scripts = mw.loadData("Module:scripts/data")
		script_pats = {}
		for sc, info in pairs(m_scripts) do
			if info.characters and not script_blacklist[sc] then
				script_pats[sc] = "[" .. info.characters .. "]"

	for sc, pat in pairs(script_pats) do
		if mw.ustring.match(text, pat) then
			script_cache[pat] = sc
			return sc
	return "Zyyy"

local unsupported_title = {
	[0x0020] = "Unsupported titles/Space";
	[0x0023] = "Unsupported titles/Number sign";
	[0x002e] = "Unsupported titles/Full stop";
	[0x003a] = "Unsupported titles/Colon";
	[0x003c] = "Unsupported titles/Less than sign";
	[0x003e] = "Unsupported titles/Greater than sign";
	[0x005b] = "Unsupported titles/Left square bracket";
	[0x005d] = "Unsupported titles/Right square bracket";
	[0x005f] = "Unsupported titles/Low line";
	[0x007b] = "Unsupported titles/Left curly bracket";
	[0x007c] = "Unsupported titles/Vertical line";
	[0x007d] = "Unsupported titles/Right curly bracket";
	[0x1680] = "Unsupported titles/Ogham space";
	[0xfffd] = "Unsupported titles/Replacement character";

function export.get_entry_title(codepoint)
	if unsupported_title[codepoint] then
		return unsupported_title[codepoint]
	if lookup_control(codepoint) ~= "assigned" then
		return nil
	return mw.ustring.char(codepoint)

return export