asd
This commit is contained in:
@ -0,0 +1,802 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# NOTE: This file was auto-generated with MetaTools/buildUCD.py.
|
||||
# Source: https://unicode.org/Public/UNIDATA/Blocks.txt
|
||||
# License: http://unicode.org/copyright.html#License
|
||||
#
|
||||
# Blocks-16.0.0.txt
|
||||
# Date: 2024-02-02
|
||||
# © 2024 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# For documentation, see https://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# Format:
|
||||
# Start Code..End Code; Block Name
|
||||
|
||||
|
||||
RANGES = [
|
||||
0x0000, # .. 0x007F ; Basic Latin
|
||||
0x0080, # .. 0x00FF ; Latin-1 Supplement
|
||||
0x0100, # .. 0x017F ; Latin Extended-A
|
||||
0x0180, # .. 0x024F ; Latin Extended-B
|
||||
0x0250, # .. 0x02AF ; IPA Extensions
|
||||
0x02B0, # .. 0x02FF ; Spacing Modifier Letters
|
||||
0x0300, # .. 0x036F ; Combining Diacritical Marks
|
||||
0x0370, # .. 0x03FF ; Greek and Coptic
|
||||
0x0400, # .. 0x04FF ; Cyrillic
|
||||
0x0500, # .. 0x052F ; Cyrillic Supplement
|
||||
0x0530, # .. 0x058F ; Armenian
|
||||
0x0590, # .. 0x05FF ; Hebrew
|
||||
0x0600, # .. 0x06FF ; Arabic
|
||||
0x0700, # .. 0x074F ; Syriac
|
||||
0x0750, # .. 0x077F ; Arabic Supplement
|
||||
0x0780, # .. 0x07BF ; Thaana
|
||||
0x07C0, # .. 0x07FF ; NKo
|
||||
0x0800, # .. 0x083F ; Samaritan
|
||||
0x0840, # .. 0x085F ; Mandaic
|
||||
0x0860, # .. 0x086F ; Syriac Supplement
|
||||
0x0870, # .. 0x089F ; Arabic Extended-B
|
||||
0x08A0, # .. 0x08FF ; Arabic Extended-A
|
||||
0x0900, # .. 0x097F ; Devanagari
|
||||
0x0980, # .. 0x09FF ; Bengali
|
||||
0x0A00, # .. 0x0A7F ; Gurmukhi
|
||||
0x0A80, # .. 0x0AFF ; Gujarati
|
||||
0x0B00, # .. 0x0B7F ; Oriya
|
||||
0x0B80, # .. 0x0BFF ; Tamil
|
||||
0x0C00, # .. 0x0C7F ; Telugu
|
||||
0x0C80, # .. 0x0CFF ; Kannada
|
||||
0x0D00, # .. 0x0D7F ; Malayalam
|
||||
0x0D80, # .. 0x0DFF ; Sinhala
|
||||
0x0E00, # .. 0x0E7F ; Thai
|
||||
0x0E80, # .. 0x0EFF ; Lao
|
||||
0x0F00, # .. 0x0FFF ; Tibetan
|
||||
0x1000, # .. 0x109F ; Myanmar
|
||||
0x10A0, # .. 0x10FF ; Georgian
|
||||
0x1100, # .. 0x11FF ; Hangul Jamo
|
||||
0x1200, # .. 0x137F ; Ethiopic
|
||||
0x1380, # .. 0x139F ; Ethiopic Supplement
|
||||
0x13A0, # .. 0x13FF ; Cherokee
|
||||
0x1400, # .. 0x167F ; Unified Canadian Aboriginal Syllabics
|
||||
0x1680, # .. 0x169F ; Ogham
|
||||
0x16A0, # .. 0x16FF ; Runic
|
||||
0x1700, # .. 0x171F ; Tagalog
|
||||
0x1720, # .. 0x173F ; Hanunoo
|
||||
0x1740, # .. 0x175F ; Buhid
|
||||
0x1760, # .. 0x177F ; Tagbanwa
|
||||
0x1780, # .. 0x17FF ; Khmer
|
||||
0x1800, # .. 0x18AF ; Mongolian
|
||||
0x18B0, # .. 0x18FF ; Unified Canadian Aboriginal Syllabics Extended
|
||||
0x1900, # .. 0x194F ; Limbu
|
||||
0x1950, # .. 0x197F ; Tai Le
|
||||
0x1980, # .. 0x19DF ; New Tai Lue
|
||||
0x19E0, # .. 0x19FF ; Khmer Symbols
|
||||
0x1A00, # .. 0x1A1F ; Buginese
|
||||
0x1A20, # .. 0x1AAF ; Tai Tham
|
||||
0x1AB0, # .. 0x1AFF ; Combining Diacritical Marks Extended
|
||||
0x1B00, # .. 0x1B7F ; Balinese
|
||||
0x1B80, # .. 0x1BBF ; Sundanese
|
||||
0x1BC0, # .. 0x1BFF ; Batak
|
||||
0x1C00, # .. 0x1C4F ; Lepcha
|
||||
0x1C50, # .. 0x1C7F ; Ol Chiki
|
||||
0x1C80, # .. 0x1C8F ; Cyrillic Extended-C
|
||||
0x1C90, # .. 0x1CBF ; Georgian Extended
|
||||
0x1CC0, # .. 0x1CCF ; Sundanese Supplement
|
||||
0x1CD0, # .. 0x1CFF ; Vedic Extensions
|
||||
0x1D00, # .. 0x1D7F ; Phonetic Extensions
|
||||
0x1D80, # .. 0x1DBF ; Phonetic Extensions Supplement
|
||||
0x1DC0, # .. 0x1DFF ; Combining Diacritical Marks Supplement
|
||||
0x1E00, # .. 0x1EFF ; Latin Extended Additional
|
||||
0x1F00, # .. 0x1FFF ; Greek Extended
|
||||
0x2000, # .. 0x206F ; General Punctuation
|
||||
0x2070, # .. 0x209F ; Superscripts and Subscripts
|
||||
0x20A0, # .. 0x20CF ; Currency Symbols
|
||||
0x20D0, # .. 0x20FF ; Combining Diacritical Marks for Symbols
|
||||
0x2100, # .. 0x214F ; Letterlike Symbols
|
||||
0x2150, # .. 0x218F ; Number Forms
|
||||
0x2190, # .. 0x21FF ; Arrows
|
||||
0x2200, # .. 0x22FF ; Mathematical Operators
|
||||
0x2300, # .. 0x23FF ; Miscellaneous Technical
|
||||
0x2400, # .. 0x243F ; Control Pictures
|
||||
0x2440, # .. 0x245F ; Optical Character Recognition
|
||||
0x2460, # .. 0x24FF ; Enclosed Alphanumerics
|
||||
0x2500, # .. 0x257F ; Box Drawing
|
||||
0x2580, # .. 0x259F ; Block Elements
|
||||
0x25A0, # .. 0x25FF ; Geometric Shapes
|
||||
0x2600, # .. 0x26FF ; Miscellaneous Symbols
|
||||
0x2700, # .. 0x27BF ; Dingbats
|
||||
0x27C0, # .. 0x27EF ; Miscellaneous Mathematical Symbols-A
|
||||
0x27F0, # .. 0x27FF ; Supplemental Arrows-A
|
||||
0x2800, # .. 0x28FF ; Braille Patterns
|
||||
0x2900, # .. 0x297F ; Supplemental Arrows-B
|
||||
0x2980, # .. 0x29FF ; Miscellaneous Mathematical Symbols-B
|
||||
0x2A00, # .. 0x2AFF ; Supplemental Mathematical Operators
|
||||
0x2B00, # .. 0x2BFF ; Miscellaneous Symbols and Arrows
|
||||
0x2C00, # .. 0x2C5F ; Glagolitic
|
||||
0x2C60, # .. 0x2C7F ; Latin Extended-C
|
||||
0x2C80, # .. 0x2CFF ; Coptic
|
||||
0x2D00, # .. 0x2D2F ; Georgian Supplement
|
||||
0x2D30, # .. 0x2D7F ; Tifinagh
|
||||
0x2D80, # .. 0x2DDF ; Ethiopic Extended
|
||||
0x2DE0, # .. 0x2DFF ; Cyrillic Extended-A
|
||||
0x2E00, # .. 0x2E7F ; Supplemental Punctuation
|
||||
0x2E80, # .. 0x2EFF ; CJK Radicals Supplement
|
||||
0x2F00, # .. 0x2FDF ; Kangxi Radicals
|
||||
0x2FE0, # .. 0x2FEF ; No_Block
|
||||
0x2FF0, # .. 0x2FFF ; Ideographic Description Characters
|
||||
0x3000, # .. 0x303F ; CJK Symbols and Punctuation
|
||||
0x3040, # .. 0x309F ; Hiragana
|
||||
0x30A0, # .. 0x30FF ; Katakana
|
||||
0x3100, # .. 0x312F ; Bopomofo
|
||||
0x3130, # .. 0x318F ; Hangul Compatibility Jamo
|
||||
0x3190, # .. 0x319F ; Kanbun
|
||||
0x31A0, # .. 0x31BF ; Bopomofo Extended
|
||||
0x31C0, # .. 0x31EF ; CJK Strokes
|
||||
0x31F0, # .. 0x31FF ; Katakana Phonetic Extensions
|
||||
0x3200, # .. 0x32FF ; Enclosed CJK Letters and Months
|
||||
0x3300, # .. 0x33FF ; CJK Compatibility
|
||||
0x3400, # .. 0x4DBF ; CJK Unified Ideographs Extension A
|
||||
0x4DC0, # .. 0x4DFF ; Yijing Hexagram Symbols
|
||||
0x4E00, # .. 0x9FFF ; CJK Unified Ideographs
|
||||
0xA000, # .. 0xA48F ; Yi Syllables
|
||||
0xA490, # .. 0xA4CF ; Yi Radicals
|
||||
0xA4D0, # .. 0xA4FF ; Lisu
|
||||
0xA500, # .. 0xA63F ; Vai
|
||||
0xA640, # .. 0xA69F ; Cyrillic Extended-B
|
||||
0xA6A0, # .. 0xA6FF ; Bamum
|
||||
0xA700, # .. 0xA71F ; Modifier Tone Letters
|
||||
0xA720, # .. 0xA7FF ; Latin Extended-D
|
||||
0xA800, # .. 0xA82F ; Syloti Nagri
|
||||
0xA830, # .. 0xA83F ; Common Indic Number Forms
|
||||
0xA840, # .. 0xA87F ; Phags-pa
|
||||
0xA880, # .. 0xA8DF ; Saurashtra
|
||||
0xA8E0, # .. 0xA8FF ; Devanagari Extended
|
||||
0xA900, # .. 0xA92F ; Kayah Li
|
||||
0xA930, # .. 0xA95F ; Rejang
|
||||
0xA960, # .. 0xA97F ; Hangul Jamo Extended-A
|
||||
0xA980, # .. 0xA9DF ; Javanese
|
||||
0xA9E0, # .. 0xA9FF ; Myanmar Extended-B
|
||||
0xAA00, # .. 0xAA5F ; Cham
|
||||
0xAA60, # .. 0xAA7F ; Myanmar Extended-A
|
||||
0xAA80, # .. 0xAADF ; Tai Viet
|
||||
0xAAE0, # .. 0xAAFF ; Meetei Mayek Extensions
|
||||
0xAB00, # .. 0xAB2F ; Ethiopic Extended-A
|
||||
0xAB30, # .. 0xAB6F ; Latin Extended-E
|
||||
0xAB70, # .. 0xABBF ; Cherokee Supplement
|
||||
0xABC0, # .. 0xABFF ; Meetei Mayek
|
||||
0xAC00, # .. 0xD7AF ; Hangul Syllables
|
||||
0xD7B0, # .. 0xD7FF ; Hangul Jamo Extended-B
|
||||
0xD800, # .. 0xDB7F ; High Surrogates
|
||||
0xDB80, # .. 0xDBFF ; High Private Use Surrogates
|
||||
0xDC00, # .. 0xDFFF ; Low Surrogates
|
||||
0xE000, # .. 0xF8FF ; Private Use Area
|
||||
0xF900, # .. 0xFAFF ; CJK Compatibility Ideographs
|
||||
0xFB00, # .. 0xFB4F ; Alphabetic Presentation Forms
|
||||
0xFB50, # .. 0xFDFF ; Arabic Presentation Forms-A
|
||||
0xFE00, # .. 0xFE0F ; Variation Selectors
|
||||
0xFE10, # .. 0xFE1F ; Vertical Forms
|
||||
0xFE20, # .. 0xFE2F ; Combining Half Marks
|
||||
0xFE30, # .. 0xFE4F ; CJK Compatibility Forms
|
||||
0xFE50, # .. 0xFE6F ; Small Form Variants
|
||||
0xFE70, # .. 0xFEFF ; Arabic Presentation Forms-B
|
||||
0xFF00, # .. 0xFFEF ; Halfwidth and Fullwidth Forms
|
||||
0xFFF0, # .. 0xFFFF ; Specials
|
||||
0x10000, # .. 0x1007F ; Linear B Syllabary
|
||||
0x10080, # .. 0x100FF ; Linear B Ideograms
|
||||
0x10100, # .. 0x1013F ; Aegean Numbers
|
||||
0x10140, # .. 0x1018F ; Ancient Greek Numbers
|
||||
0x10190, # .. 0x101CF ; Ancient Symbols
|
||||
0x101D0, # .. 0x101FF ; Phaistos Disc
|
||||
0x10200, # .. 0x1027F ; No_Block
|
||||
0x10280, # .. 0x1029F ; Lycian
|
||||
0x102A0, # .. 0x102DF ; Carian
|
||||
0x102E0, # .. 0x102FF ; Coptic Epact Numbers
|
||||
0x10300, # .. 0x1032F ; Old Italic
|
||||
0x10330, # .. 0x1034F ; Gothic
|
||||
0x10350, # .. 0x1037F ; Old Permic
|
||||
0x10380, # .. 0x1039F ; Ugaritic
|
||||
0x103A0, # .. 0x103DF ; Old Persian
|
||||
0x103E0, # .. 0x103FF ; No_Block
|
||||
0x10400, # .. 0x1044F ; Deseret
|
||||
0x10450, # .. 0x1047F ; Shavian
|
||||
0x10480, # .. 0x104AF ; Osmanya
|
||||
0x104B0, # .. 0x104FF ; Osage
|
||||
0x10500, # .. 0x1052F ; Elbasan
|
||||
0x10530, # .. 0x1056F ; Caucasian Albanian
|
||||
0x10570, # .. 0x105BF ; Vithkuqi
|
||||
0x105C0, # .. 0x105FF ; Todhri
|
||||
0x10600, # .. 0x1077F ; Linear A
|
||||
0x10780, # .. 0x107BF ; Latin Extended-F
|
||||
0x107C0, # .. 0x107FF ; No_Block
|
||||
0x10800, # .. 0x1083F ; Cypriot Syllabary
|
||||
0x10840, # .. 0x1085F ; Imperial Aramaic
|
||||
0x10860, # .. 0x1087F ; Palmyrene
|
||||
0x10880, # .. 0x108AF ; Nabataean
|
||||
0x108B0, # .. 0x108DF ; No_Block
|
||||
0x108E0, # .. 0x108FF ; Hatran
|
||||
0x10900, # .. 0x1091F ; Phoenician
|
||||
0x10920, # .. 0x1093F ; Lydian
|
||||
0x10940, # .. 0x1097F ; No_Block
|
||||
0x10980, # .. 0x1099F ; Meroitic Hieroglyphs
|
||||
0x109A0, # .. 0x109FF ; Meroitic Cursive
|
||||
0x10A00, # .. 0x10A5F ; Kharoshthi
|
||||
0x10A60, # .. 0x10A7F ; Old South Arabian
|
||||
0x10A80, # .. 0x10A9F ; Old North Arabian
|
||||
0x10AA0, # .. 0x10ABF ; No_Block
|
||||
0x10AC0, # .. 0x10AFF ; Manichaean
|
||||
0x10B00, # .. 0x10B3F ; Avestan
|
||||
0x10B40, # .. 0x10B5F ; Inscriptional Parthian
|
||||
0x10B60, # .. 0x10B7F ; Inscriptional Pahlavi
|
||||
0x10B80, # .. 0x10BAF ; Psalter Pahlavi
|
||||
0x10BB0, # .. 0x10BFF ; No_Block
|
||||
0x10C00, # .. 0x10C4F ; Old Turkic
|
||||
0x10C50, # .. 0x10C7F ; No_Block
|
||||
0x10C80, # .. 0x10CFF ; Old Hungarian
|
||||
0x10D00, # .. 0x10D3F ; Hanifi Rohingya
|
||||
0x10D40, # .. 0x10D8F ; Garay
|
||||
0x10D90, # .. 0x10E5F ; No_Block
|
||||
0x10E60, # .. 0x10E7F ; Rumi Numeral Symbols
|
||||
0x10E80, # .. 0x10EBF ; Yezidi
|
||||
0x10EC0, # .. 0x10EFF ; Arabic Extended-C
|
||||
0x10F00, # .. 0x10F2F ; Old Sogdian
|
||||
0x10F30, # .. 0x10F6F ; Sogdian
|
||||
0x10F70, # .. 0x10FAF ; Old Uyghur
|
||||
0x10FB0, # .. 0x10FDF ; Chorasmian
|
||||
0x10FE0, # .. 0x10FFF ; Elymaic
|
||||
0x11000, # .. 0x1107F ; Brahmi
|
||||
0x11080, # .. 0x110CF ; Kaithi
|
||||
0x110D0, # .. 0x110FF ; Sora Sompeng
|
||||
0x11100, # .. 0x1114F ; Chakma
|
||||
0x11150, # .. 0x1117F ; Mahajani
|
||||
0x11180, # .. 0x111DF ; Sharada
|
||||
0x111E0, # .. 0x111FF ; Sinhala Archaic Numbers
|
||||
0x11200, # .. 0x1124F ; Khojki
|
||||
0x11250, # .. 0x1127F ; No_Block
|
||||
0x11280, # .. 0x112AF ; Multani
|
||||
0x112B0, # .. 0x112FF ; Khudawadi
|
||||
0x11300, # .. 0x1137F ; Grantha
|
||||
0x11380, # .. 0x113FF ; Tulu-Tigalari
|
||||
0x11400, # .. 0x1147F ; Newa
|
||||
0x11480, # .. 0x114DF ; Tirhuta
|
||||
0x114E0, # .. 0x1157F ; No_Block
|
||||
0x11580, # .. 0x115FF ; Siddham
|
||||
0x11600, # .. 0x1165F ; Modi
|
||||
0x11660, # .. 0x1167F ; Mongolian Supplement
|
||||
0x11680, # .. 0x116CF ; Takri
|
||||
0x116D0, # .. 0x116FF ; Myanmar Extended-C
|
||||
0x11700, # .. 0x1174F ; Ahom
|
||||
0x11750, # .. 0x117FF ; No_Block
|
||||
0x11800, # .. 0x1184F ; Dogra
|
||||
0x11850, # .. 0x1189F ; No_Block
|
||||
0x118A0, # .. 0x118FF ; Warang Citi
|
||||
0x11900, # .. 0x1195F ; Dives Akuru
|
||||
0x11960, # .. 0x1199F ; No_Block
|
||||
0x119A0, # .. 0x119FF ; Nandinagari
|
||||
0x11A00, # .. 0x11A4F ; Zanabazar Square
|
||||
0x11A50, # .. 0x11AAF ; Soyombo
|
||||
0x11AB0, # .. 0x11ABF ; Unified Canadian Aboriginal Syllabics Extended-A
|
||||
0x11AC0, # .. 0x11AFF ; Pau Cin Hau
|
||||
0x11B00, # .. 0x11B5F ; Devanagari Extended-A
|
||||
0x11B60, # .. 0x11BBF ; No_Block
|
||||
0x11BC0, # .. 0x11BFF ; Sunuwar
|
||||
0x11C00, # .. 0x11C6F ; Bhaiksuki
|
||||
0x11C70, # .. 0x11CBF ; Marchen
|
||||
0x11CC0, # .. 0x11CFF ; No_Block
|
||||
0x11D00, # .. 0x11D5F ; Masaram Gondi
|
||||
0x11D60, # .. 0x11DAF ; Gunjala Gondi
|
||||
0x11DB0, # .. 0x11EDF ; No_Block
|
||||
0x11EE0, # .. 0x11EFF ; Makasar
|
||||
0x11F00, # .. 0x11F5F ; Kawi
|
||||
0x11F60, # .. 0x11FAF ; No_Block
|
||||
0x11FB0, # .. 0x11FBF ; Lisu Supplement
|
||||
0x11FC0, # .. 0x11FFF ; Tamil Supplement
|
||||
0x12000, # .. 0x123FF ; Cuneiform
|
||||
0x12400, # .. 0x1247F ; Cuneiform Numbers and Punctuation
|
||||
0x12480, # .. 0x1254F ; Early Dynastic Cuneiform
|
||||
0x12550, # .. 0x12F8F ; No_Block
|
||||
0x12F90, # .. 0x12FFF ; Cypro-Minoan
|
||||
0x13000, # .. 0x1342F ; Egyptian Hieroglyphs
|
||||
0x13430, # .. 0x1345F ; Egyptian Hieroglyph Format Controls
|
||||
0x13460, # .. 0x143FF ; Egyptian Hieroglyphs Extended-A
|
||||
0x14400, # .. 0x1467F ; Anatolian Hieroglyphs
|
||||
0x14680, # .. 0x160FF ; No_Block
|
||||
0x16100, # .. 0x1613F ; Gurung Khema
|
||||
0x16140, # .. 0x167FF ; No_Block
|
||||
0x16800, # .. 0x16A3F ; Bamum Supplement
|
||||
0x16A40, # .. 0x16A6F ; Mro
|
||||
0x16A70, # .. 0x16ACF ; Tangsa
|
||||
0x16AD0, # .. 0x16AFF ; Bassa Vah
|
||||
0x16B00, # .. 0x16B8F ; Pahawh Hmong
|
||||
0x16B90, # .. 0x16D3F ; No_Block
|
||||
0x16D40, # .. 0x16D7F ; Kirat Rai
|
||||
0x16D80, # .. 0x16E3F ; No_Block
|
||||
0x16E40, # .. 0x16E9F ; Medefaidrin
|
||||
0x16EA0, # .. 0x16EFF ; No_Block
|
||||
0x16F00, # .. 0x16F9F ; Miao
|
||||
0x16FA0, # .. 0x16FDF ; No_Block
|
||||
0x16FE0, # .. 0x16FFF ; Ideographic Symbols and Punctuation
|
||||
0x17000, # .. 0x187FF ; Tangut
|
||||
0x18800, # .. 0x18AFF ; Tangut Components
|
||||
0x18B00, # .. 0x18CFF ; Khitan Small Script
|
||||
0x18D00, # .. 0x18D7F ; Tangut Supplement
|
||||
0x18D80, # .. 0x1AFEF ; No_Block
|
||||
0x1AFF0, # .. 0x1AFFF ; Kana Extended-B
|
||||
0x1B000, # .. 0x1B0FF ; Kana Supplement
|
||||
0x1B100, # .. 0x1B12F ; Kana Extended-A
|
||||
0x1B130, # .. 0x1B16F ; Small Kana Extension
|
||||
0x1B170, # .. 0x1B2FF ; Nushu
|
||||
0x1B300, # .. 0x1BBFF ; No_Block
|
||||
0x1BC00, # .. 0x1BC9F ; Duployan
|
||||
0x1BCA0, # .. 0x1BCAF ; Shorthand Format Controls
|
||||
0x1BCB0, # .. 0x1CBFF ; No_Block
|
||||
0x1CC00, # .. 0x1CEBF ; Symbols for Legacy Computing Supplement
|
||||
0x1CEC0, # .. 0x1CEFF ; No_Block
|
||||
0x1CF00, # .. 0x1CFCF ; Znamenny Musical Notation
|
||||
0x1CFD0, # .. 0x1CFFF ; No_Block
|
||||
0x1D000, # .. 0x1D0FF ; Byzantine Musical Symbols
|
||||
0x1D100, # .. 0x1D1FF ; Musical Symbols
|
||||
0x1D200, # .. 0x1D24F ; Ancient Greek Musical Notation
|
||||
0x1D250, # .. 0x1D2BF ; No_Block
|
||||
0x1D2C0, # .. 0x1D2DF ; Kaktovik Numerals
|
||||
0x1D2E0, # .. 0x1D2FF ; Mayan Numerals
|
||||
0x1D300, # .. 0x1D35F ; Tai Xuan Jing Symbols
|
||||
0x1D360, # .. 0x1D37F ; Counting Rod Numerals
|
||||
0x1D380, # .. 0x1D3FF ; No_Block
|
||||
0x1D400, # .. 0x1D7FF ; Mathematical Alphanumeric Symbols
|
||||
0x1D800, # .. 0x1DAAF ; Sutton SignWriting
|
||||
0x1DAB0, # .. 0x1DEFF ; No_Block
|
||||
0x1DF00, # .. 0x1DFFF ; Latin Extended-G
|
||||
0x1E000, # .. 0x1E02F ; Glagolitic Supplement
|
||||
0x1E030, # .. 0x1E08F ; Cyrillic Extended-D
|
||||
0x1E090, # .. 0x1E0FF ; No_Block
|
||||
0x1E100, # .. 0x1E14F ; Nyiakeng Puachue Hmong
|
||||
0x1E150, # .. 0x1E28F ; No_Block
|
||||
0x1E290, # .. 0x1E2BF ; Toto
|
||||
0x1E2C0, # .. 0x1E2FF ; Wancho
|
||||
0x1E300, # .. 0x1E4CF ; No_Block
|
||||
0x1E4D0, # .. 0x1E4FF ; Nag Mundari
|
||||
0x1E500, # .. 0x1E5CF ; No_Block
|
||||
0x1E5D0, # .. 0x1E5FF ; Ol Onal
|
||||
0x1E600, # .. 0x1E7DF ; No_Block
|
||||
0x1E7E0, # .. 0x1E7FF ; Ethiopic Extended-B
|
||||
0x1E800, # .. 0x1E8DF ; Mende Kikakui
|
||||
0x1E8E0, # .. 0x1E8FF ; No_Block
|
||||
0x1E900, # .. 0x1E95F ; Adlam
|
||||
0x1E960, # .. 0x1EC6F ; No_Block
|
||||
0x1EC70, # .. 0x1ECBF ; Indic Siyaq Numbers
|
||||
0x1ECC0, # .. 0x1ECFF ; No_Block
|
||||
0x1ED00, # .. 0x1ED4F ; Ottoman Siyaq Numbers
|
||||
0x1ED50, # .. 0x1EDFF ; No_Block
|
||||
0x1EE00, # .. 0x1EEFF ; Arabic Mathematical Alphabetic Symbols
|
||||
0x1EF00, # .. 0x1EFFF ; No_Block
|
||||
0x1F000, # .. 0x1F02F ; Mahjong Tiles
|
||||
0x1F030, # .. 0x1F09F ; Domino Tiles
|
||||
0x1F0A0, # .. 0x1F0FF ; Playing Cards
|
||||
0x1F100, # .. 0x1F1FF ; Enclosed Alphanumeric Supplement
|
||||
0x1F200, # .. 0x1F2FF ; Enclosed Ideographic Supplement
|
||||
0x1F300, # .. 0x1F5FF ; Miscellaneous Symbols and Pictographs
|
||||
0x1F600, # .. 0x1F64F ; Emoticons
|
||||
0x1F650, # .. 0x1F67F ; Ornamental Dingbats
|
||||
0x1F680, # .. 0x1F6FF ; Transport and Map Symbols
|
||||
0x1F700, # .. 0x1F77F ; Alchemical Symbols
|
||||
0x1F780, # .. 0x1F7FF ; Geometric Shapes Extended
|
||||
0x1F800, # .. 0x1F8FF ; Supplemental Arrows-C
|
||||
0x1F900, # .. 0x1F9FF ; Supplemental Symbols and Pictographs
|
||||
0x1FA00, # .. 0x1FA6F ; Chess Symbols
|
||||
0x1FA70, # .. 0x1FAFF ; Symbols and Pictographs Extended-A
|
||||
0x1FB00, # .. 0x1FBFF ; Symbols for Legacy Computing
|
||||
0x1FC00, # .. 0x1FFFF ; No_Block
|
||||
0x20000, # .. 0x2A6DF ; CJK Unified Ideographs Extension B
|
||||
0x2A6E0, # .. 0x2A6FF ; No_Block
|
||||
0x2A700, # .. 0x2B73F ; CJK Unified Ideographs Extension C
|
||||
0x2B740, # .. 0x2B81F ; CJK Unified Ideographs Extension D
|
||||
0x2B820, # .. 0x2CEAF ; CJK Unified Ideographs Extension E
|
||||
0x2CEB0, # .. 0x2EBEF ; CJK Unified Ideographs Extension F
|
||||
0x2EBF0, # .. 0x2EE5F ; CJK Unified Ideographs Extension I
|
||||
0x2EE60, # .. 0x2F7FF ; No_Block
|
||||
0x2F800, # .. 0x2FA1F ; CJK Compatibility Ideographs Supplement
|
||||
0x2FA20, # .. 0x2FFFF ; No_Block
|
||||
0x30000, # .. 0x3134F ; CJK Unified Ideographs Extension G
|
||||
0x31350, # .. 0x323AF ; CJK Unified Ideographs Extension H
|
||||
0x323B0, # .. 0xDFFFF ; No_Block
|
||||
0xE0000, # .. 0xE007F ; Tags
|
||||
0xE0080, # .. 0xE00FF ; No_Block
|
||||
0xE0100, # .. 0xE01EF ; Variation Selectors Supplement
|
||||
0xE01F0, # .. 0xEFFFF ; No_Block
|
||||
0xF0000, # .. 0xFFFFF ; Supplementary Private Use Area-A
|
||||
0x100000, # .. 0x10FFFF ; Supplementary Private Use Area-B
|
||||
]
|
||||
|
||||
VALUES = [
|
||||
"Basic Latin", # 0000..007F
|
||||
"Latin-1 Supplement", # 0080..00FF
|
||||
"Latin Extended-A", # 0100..017F
|
||||
"Latin Extended-B", # 0180..024F
|
||||
"IPA Extensions", # 0250..02AF
|
||||
"Spacing Modifier Letters", # 02B0..02FF
|
||||
"Combining Diacritical Marks", # 0300..036F
|
||||
"Greek and Coptic", # 0370..03FF
|
||||
"Cyrillic", # 0400..04FF
|
||||
"Cyrillic Supplement", # 0500..052F
|
||||
"Armenian", # 0530..058F
|
||||
"Hebrew", # 0590..05FF
|
||||
"Arabic", # 0600..06FF
|
||||
"Syriac", # 0700..074F
|
||||
"Arabic Supplement", # 0750..077F
|
||||
"Thaana", # 0780..07BF
|
||||
"NKo", # 07C0..07FF
|
||||
"Samaritan", # 0800..083F
|
||||
"Mandaic", # 0840..085F
|
||||
"Syriac Supplement", # 0860..086F
|
||||
"Arabic Extended-B", # 0870..089F
|
||||
"Arabic Extended-A", # 08A0..08FF
|
||||
"Devanagari", # 0900..097F
|
||||
"Bengali", # 0980..09FF
|
||||
"Gurmukhi", # 0A00..0A7F
|
||||
"Gujarati", # 0A80..0AFF
|
||||
"Oriya", # 0B00..0B7F
|
||||
"Tamil", # 0B80..0BFF
|
||||
"Telugu", # 0C00..0C7F
|
||||
"Kannada", # 0C80..0CFF
|
||||
"Malayalam", # 0D00..0D7F
|
||||
"Sinhala", # 0D80..0DFF
|
||||
"Thai", # 0E00..0E7F
|
||||
"Lao", # 0E80..0EFF
|
||||
"Tibetan", # 0F00..0FFF
|
||||
"Myanmar", # 1000..109F
|
||||
"Georgian", # 10A0..10FF
|
||||
"Hangul Jamo", # 1100..11FF
|
||||
"Ethiopic", # 1200..137F
|
||||
"Ethiopic Supplement", # 1380..139F
|
||||
"Cherokee", # 13A0..13FF
|
||||
"Unified Canadian Aboriginal Syllabics", # 1400..167F
|
||||
"Ogham", # 1680..169F
|
||||
"Runic", # 16A0..16FF
|
||||
"Tagalog", # 1700..171F
|
||||
"Hanunoo", # 1720..173F
|
||||
"Buhid", # 1740..175F
|
||||
"Tagbanwa", # 1760..177F
|
||||
"Khmer", # 1780..17FF
|
||||
"Mongolian", # 1800..18AF
|
||||
"Unified Canadian Aboriginal Syllabics Extended", # 18B0..18FF
|
||||
"Limbu", # 1900..194F
|
||||
"Tai Le", # 1950..197F
|
||||
"New Tai Lue", # 1980..19DF
|
||||
"Khmer Symbols", # 19E0..19FF
|
||||
"Buginese", # 1A00..1A1F
|
||||
"Tai Tham", # 1A20..1AAF
|
||||
"Combining Diacritical Marks Extended", # 1AB0..1AFF
|
||||
"Balinese", # 1B00..1B7F
|
||||
"Sundanese", # 1B80..1BBF
|
||||
"Batak", # 1BC0..1BFF
|
||||
"Lepcha", # 1C00..1C4F
|
||||
"Ol Chiki", # 1C50..1C7F
|
||||
"Cyrillic Extended-C", # 1C80..1C8F
|
||||
"Georgian Extended", # 1C90..1CBF
|
||||
"Sundanese Supplement", # 1CC0..1CCF
|
||||
"Vedic Extensions", # 1CD0..1CFF
|
||||
"Phonetic Extensions", # 1D00..1D7F
|
||||
"Phonetic Extensions Supplement", # 1D80..1DBF
|
||||
"Combining Diacritical Marks Supplement", # 1DC0..1DFF
|
||||
"Latin Extended Additional", # 1E00..1EFF
|
||||
"Greek Extended", # 1F00..1FFF
|
||||
"General Punctuation", # 2000..206F
|
||||
"Superscripts and Subscripts", # 2070..209F
|
||||
"Currency Symbols", # 20A0..20CF
|
||||
"Combining Diacritical Marks for Symbols", # 20D0..20FF
|
||||
"Letterlike Symbols", # 2100..214F
|
||||
"Number Forms", # 2150..218F
|
||||
"Arrows", # 2190..21FF
|
||||
"Mathematical Operators", # 2200..22FF
|
||||
"Miscellaneous Technical", # 2300..23FF
|
||||
"Control Pictures", # 2400..243F
|
||||
"Optical Character Recognition", # 2440..245F
|
||||
"Enclosed Alphanumerics", # 2460..24FF
|
||||
"Box Drawing", # 2500..257F
|
||||
"Block Elements", # 2580..259F
|
||||
"Geometric Shapes", # 25A0..25FF
|
||||
"Miscellaneous Symbols", # 2600..26FF
|
||||
"Dingbats", # 2700..27BF
|
||||
"Miscellaneous Mathematical Symbols-A", # 27C0..27EF
|
||||
"Supplemental Arrows-A", # 27F0..27FF
|
||||
"Braille Patterns", # 2800..28FF
|
||||
"Supplemental Arrows-B", # 2900..297F
|
||||
"Miscellaneous Mathematical Symbols-B", # 2980..29FF
|
||||
"Supplemental Mathematical Operators", # 2A00..2AFF
|
||||
"Miscellaneous Symbols and Arrows", # 2B00..2BFF
|
||||
"Glagolitic", # 2C00..2C5F
|
||||
"Latin Extended-C", # 2C60..2C7F
|
||||
"Coptic", # 2C80..2CFF
|
||||
"Georgian Supplement", # 2D00..2D2F
|
||||
"Tifinagh", # 2D30..2D7F
|
||||
"Ethiopic Extended", # 2D80..2DDF
|
||||
"Cyrillic Extended-A", # 2DE0..2DFF
|
||||
"Supplemental Punctuation", # 2E00..2E7F
|
||||
"CJK Radicals Supplement", # 2E80..2EFF
|
||||
"Kangxi Radicals", # 2F00..2FDF
|
||||
"No_Block", # 2FE0..2FEF
|
||||
"Ideographic Description Characters", # 2FF0..2FFF
|
||||
"CJK Symbols and Punctuation", # 3000..303F
|
||||
"Hiragana", # 3040..309F
|
||||
"Katakana", # 30A0..30FF
|
||||
"Bopomofo", # 3100..312F
|
||||
"Hangul Compatibility Jamo", # 3130..318F
|
||||
"Kanbun", # 3190..319F
|
||||
"Bopomofo Extended", # 31A0..31BF
|
||||
"CJK Strokes", # 31C0..31EF
|
||||
"Katakana Phonetic Extensions", # 31F0..31FF
|
||||
"Enclosed CJK Letters and Months", # 3200..32FF
|
||||
"CJK Compatibility", # 3300..33FF
|
||||
"CJK Unified Ideographs Extension A", # 3400..4DBF
|
||||
"Yijing Hexagram Symbols", # 4DC0..4DFF
|
||||
"CJK Unified Ideographs", # 4E00..9FFF
|
||||
"Yi Syllables", # A000..A48F
|
||||
"Yi Radicals", # A490..A4CF
|
||||
"Lisu", # A4D0..A4FF
|
||||
"Vai", # A500..A63F
|
||||
"Cyrillic Extended-B", # A640..A69F
|
||||
"Bamum", # A6A0..A6FF
|
||||
"Modifier Tone Letters", # A700..A71F
|
||||
"Latin Extended-D", # A720..A7FF
|
||||
"Syloti Nagri", # A800..A82F
|
||||
"Common Indic Number Forms", # A830..A83F
|
||||
"Phags-pa", # A840..A87F
|
||||
"Saurashtra", # A880..A8DF
|
||||
"Devanagari Extended", # A8E0..A8FF
|
||||
"Kayah Li", # A900..A92F
|
||||
"Rejang", # A930..A95F
|
||||
"Hangul Jamo Extended-A", # A960..A97F
|
||||
"Javanese", # A980..A9DF
|
||||
"Myanmar Extended-B", # A9E0..A9FF
|
||||
"Cham", # AA00..AA5F
|
||||
"Myanmar Extended-A", # AA60..AA7F
|
||||
"Tai Viet", # AA80..AADF
|
||||
"Meetei Mayek Extensions", # AAE0..AAFF
|
||||
"Ethiopic Extended-A", # AB00..AB2F
|
||||
"Latin Extended-E", # AB30..AB6F
|
||||
"Cherokee Supplement", # AB70..ABBF
|
||||
"Meetei Mayek", # ABC0..ABFF
|
||||
"Hangul Syllables", # AC00..D7AF
|
||||
"Hangul Jamo Extended-B", # D7B0..D7FF
|
||||
"High Surrogates", # D800..DB7F
|
||||
"High Private Use Surrogates", # DB80..DBFF
|
||||
"Low Surrogates", # DC00..DFFF
|
||||
"Private Use Area", # E000..F8FF
|
||||
"CJK Compatibility Ideographs", # F900..FAFF
|
||||
"Alphabetic Presentation Forms", # FB00..FB4F
|
||||
"Arabic Presentation Forms-A", # FB50..FDFF
|
||||
"Variation Selectors", # FE00..FE0F
|
||||
"Vertical Forms", # FE10..FE1F
|
||||
"Combining Half Marks", # FE20..FE2F
|
||||
"CJK Compatibility Forms", # FE30..FE4F
|
||||
"Small Form Variants", # FE50..FE6F
|
||||
"Arabic Presentation Forms-B", # FE70..FEFF
|
||||
"Halfwidth and Fullwidth Forms", # FF00..FFEF
|
||||
"Specials", # FFF0..FFFF
|
||||
"Linear B Syllabary", # 10000..1007F
|
||||
"Linear B Ideograms", # 10080..100FF
|
||||
"Aegean Numbers", # 10100..1013F
|
||||
"Ancient Greek Numbers", # 10140..1018F
|
||||
"Ancient Symbols", # 10190..101CF
|
||||
"Phaistos Disc", # 101D0..101FF
|
||||
"No_Block", # 10200..1027F
|
||||
"Lycian", # 10280..1029F
|
||||
"Carian", # 102A0..102DF
|
||||
"Coptic Epact Numbers", # 102E0..102FF
|
||||
"Old Italic", # 10300..1032F
|
||||
"Gothic", # 10330..1034F
|
||||
"Old Permic", # 10350..1037F
|
||||
"Ugaritic", # 10380..1039F
|
||||
"Old Persian", # 103A0..103DF
|
||||
"No_Block", # 103E0..103FF
|
||||
"Deseret", # 10400..1044F
|
||||
"Shavian", # 10450..1047F
|
||||
"Osmanya", # 10480..104AF
|
||||
"Osage", # 104B0..104FF
|
||||
"Elbasan", # 10500..1052F
|
||||
"Caucasian Albanian", # 10530..1056F
|
||||
"Vithkuqi", # 10570..105BF
|
||||
"Todhri", # 105C0..105FF
|
||||
"Linear A", # 10600..1077F
|
||||
"Latin Extended-F", # 10780..107BF
|
||||
"No_Block", # 107C0..107FF
|
||||
"Cypriot Syllabary", # 10800..1083F
|
||||
"Imperial Aramaic", # 10840..1085F
|
||||
"Palmyrene", # 10860..1087F
|
||||
"Nabataean", # 10880..108AF
|
||||
"No_Block", # 108B0..108DF
|
||||
"Hatran", # 108E0..108FF
|
||||
"Phoenician", # 10900..1091F
|
||||
"Lydian", # 10920..1093F
|
||||
"No_Block", # 10940..1097F
|
||||
"Meroitic Hieroglyphs", # 10980..1099F
|
||||
"Meroitic Cursive", # 109A0..109FF
|
||||
"Kharoshthi", # 10A00..10A5F
|
||||
"Old South Arabian", # 10A60..10A7F
|
||||
"Old North Arabian", # 10A80..10A9F
|
||||
"No_Block", # 10AA0..10ABF
|
||||
"Manichaean", # 10AC0..10AFF
|
||||
"Avestan", # 10B00..10B3F
|
||||
"Inscriptional Parthian", # 10B40..10B5F
|
||||
"Inscriptional Pahlavi", # 10B60..10B7F
|
||||
"Psalter Pahlavi", # 10B80..10BAF
|
||||
"No_Block", # 10BB0..10BFF
|
||||
"Old Turkic", # 10C00..10C4F
|
||||
"No_Block", # 10C50..10C7F
|
||||
"Old Hungarian", # 10C80..10CFF
|
||||
"Hanifi Rohingya", # 10D00..10D3F
|
||||
"Garay", # 10D40..10D8F
|
||||
"No_Block", # 10D90..10E5F
|
||||
"Rumi Numeral Symbols", # 10E60..10E7F
|
||||
"Yezidi", # 10E80..10EBF
|
||||
"Arabic Extended-C", # 10EC0..10EFF
|
||||
"Old Sogdian", # 10F00..10F2F
|
||||
"Sogdian", # 10F30..10F6F
|
||||
"Old Uyghur", # 10F70..10FAF
|
||||
"Chorasmian", # 10FB0..10FDF
|
||||
"Elymaic", # 10FE0..10FFF
|
||||
"Brahmi", # 11000..1107F
|
||||
"Kaithi", # 11080..110CF
|
||||
"Sora Sompeng", # 110D0..110FF
|
||||
"Chakma", # 11100..1114F
|
||||
"Mahajani", # 11150..1117F
|
||||
"Sharada", # 11180..111DF
|
||||
"Sinhala Archaic Numbers", # 111E0..111FF
|
||||
"Khojki", # 11200..1124F
|
||||
"No_Block", # 11250..1127F
|
||||
"Multani", # 11280..112AF
|
||||
"Khudawadi", # 112B0..112FF
|
||||
"Grantha", # 11300..1137F
|
||||
"Tulu-Tigalari", # 11380..113FF
|
||||
"Newa", # 11400..1147F
|
||||
"Tirhuta", # 11480..114DF
|
||||
"No_Block", # 114E0..1157F
|
||||
"Siddham", # 11580..115FF
|
||||
"Modi", # 11600..1165F
|
||||
"Mongolian Supplement", # 11660..1167F
|
||||
"Takri", # 11680..116CF
|
||||
"Myanmar Extended-C", # 116D0..116FF
|
||||
"Ahom", # 11700..1174F
|
||||
"No_Block", # 11750..117FF
|
||||
"Dogra", # 11800..1184F
|
||||
"No_Block", # 11850..1189F
|
||||
"Warang Citi", # 118A0..118FF
|
||||
"Dives Akuru", # 11900..1195F
|
||||
"No_Block", # 11960..1199F
|
||||
"Nandinagari", # 119A0..119FF
|
||||
"Zanabazar Square", # 11A00..11A4F
|
||||
"Soyombo", # 11A50..11AAF
|
||||
"Unified Canadian Aboriginal Syllabics Extended-A", # 11AB0..11ABF
|
||||
"Pau Cin Hau", # 11AC0..11AFF
|
||||
"Devanagari Extended-A", # 11B00..11B5F
|
||||
"No_Block", # 11B60..11BBF
|
||||
"Sunuwar", # 11BC0..11BFF
|
||||
"Bhaiksuki", # 11C00..11C6F
|
||||
"Marchen", # 11C70..11CBF
|
||||
"No_Block", # 11CC0..11CFF
|
||||
"Masaram Gondi", # 11D00..11D5F
|
||||
"Gunjala Gondi", # 11D60..11DAF
|
||||
"No_Block", # 11DB0..11EDF
|
||||
"Makasar", # 11EE0..11EFF
|
||||
"Kawi", # 11F00..11F5F
|
||||
"No_Block", # 11F60..11FAF
|
||||
"Lisu Supplement", # 11FB0..11FBF
|
||||
"Tamil Supplement", # 11FC0..11FFF
|
||||
"Cuneiform", # 12000..123FF
|
||||
"Cuneiform Numbers and Punctuation", # 12400..1247F
|
||||
"Early Dynastic Cuneiform", # 12480..1254F
|
||||
"No_Block", # 12550..12F8F
|
||||
"Cypro-Minoan", # 12F90..12FFF
|
||||
"Egyptian Hieroglyphs", # 13000..1342F
|
||||
"Egyptian Hieroglyph Format Controls", # 13430..1345F
|
||||
"Egyptian Hieroglyphs Extended-A", # 13460..143FF
|
||||
"Anatolian Hieroglyphs", # 14400..1467F
|
||||
"No_Block", # 14680..160FF
|
||||
"Gurung Khema", # 16100..1613F
|
||||
"No_Block", # 16140..167FF
|
||||
"Bamum Supplement", # 16800..16A3F
|
||||
"Mro", # 16A40..16A6F
|
||||
"Tangsa", # 16A70..16ACF
|
||||
"Bassa Vah", # 16AD0..16AFF
|
||||
"Pahawh Hmong", # 16B00..16B8F
|
||||
"No_Block", # 16B90..16D3F
|
||||
"Kirat Rai", # 16D40..16D7F
|
||||
"No_Block", # 16D80..16E3F
|
||||
"Medefaidrin", # 16E40..16E9F
|
||||
"No_Block", # 16EA0..16EFF
|
||||
"Miao", # 16F00..16F9F
|
||||
"No_Block", # 16FA0..16FDF
|
||||
"Ideographic Symbols and Punctuation", # 16FE0..16FFF
|
||||
"Tangut", # 17000..187FF
|
||||
"Tangut Components", # 18800..18AFF
|
||||
"Khitan Small Script", # 18B00..18CFF
|
||||
"Tangut Supplement", # 18D00..18D7F
|
||||
"No_Block", # 18D80..1AFEF
|
||||
"Kana Extended-B", # 1AFF0..1AFFF
|
||||
"Kana Supplement", # 1B000..1B0FF
|
||||
"Kana Extended-A", # 1B100..1B12F
|
||||
"Small Kana Extension", # 1B130..1B16F
|
||||
"Nushu", # 1B170..1B2FF
|
||||
"No_Block", # 1B300..1BBFF
|
||||
"Duployan", # 1BC00..1BC9F
|
||||
"Shorthand Format Controls", # 1BCA0..1BCAF
|
||||
"No_Block", # 1BCB0..1CBFF
|
||||
"Symbols for Legacy Computing Supplement", # 1CC00..1CEBF
|
||||
"No_Block", # 1CEC0..1CEFF
|
||||
"Znamenny Musical Notation", # 1CF00..1CFCF
|
||||
"No_Block", # 1CFD0..1CFFF
|
||||
"Byzantine Musical Symbols", # 1D000..1D0FF
|
||||
"Musical Symbols", # 1D100..1D1FF
|
||||
"Ancient Greek Musical Notation", # 1D200..1D24F
|
||||
"No_Block", # 1D250..1D2BF
|
||||
"Kaktovik Numerals", # 1D2C0..1D2DF
|
||||
"Mayan Numerals", # 1D2E0..1D2FF
|
||||
"Tai Xuan Jing Symbols", # 1D300..1D35F
|
||||
"Counting Rod Numerals", # 1D360..1D37F
|
||||
"No_Block", # 1D380..1D3FF
|
||||
"Mathematical Alphanumeric Symbols", # 1D400..1D7FF
|
||||
"Sutton SignWriting", # 1D800..1DAAF
|
||||
"No_Block", # 1DAB0..1DEFF
|
||||
"Latin Extended-G", # 1DF00..1DFFF
|
||||
"Glagolitic Supplement", # 1E000..1E02F
|
||||
"Cyrillic Extended-D", # 1E030..1E08F
|
||||
"No_Block", # 1E090..1E0FF
|
||||
"Nyiakeng Puachue Hmong", # 1E100..1E14F
|
||||
"No_Block", # 1E150..1E28F
|
||||
"Toto", # 1E290..1E2BF
|
||||
"Wancho", # 1E2C0..1E2FF
|
||||
"No_Block", # 1E300..1E4CF
|
||||
"Nag Mundari", # 1E4D0..1E4FF
|
||||
"No_Block", # 1E500..1E5CF
|
||||
"Ol Onal", # 1E5D0..1E5FF
|
||||
"No_Block", # 1E600..1E7DF
|
||||
"Ethiopic Extended-B", # 1E7E0..1E7FF
|
||||
"Mende Kikakui", # 1E800..1E8DF
|
||||
"No_Block", # 1E8E0..1E8FF
|
||||
"Adlam", # 1E900..1E95F
|
||||
"No_Block", # 1E960..1EC6F
|
||||
"Indic Siyaq Numbers", # 1EC70..1ECBF
|
||||
"No_Block", # 1ECC0..1ECFF
|
||||
"Ottoman Siyaq Numbers", # 1ED00..1ED4F
|
||||
"No_Block", # 1ED50..1EDFF
|
||||
"Arabic Mathematical Alphabetic Symbols", # 1EE00..1EEFF
|
||||
"No_Block", # 1EF00..1EFFF
|
||||
"Mahjong Tiles", # 1F000..1F02F
|
||||
"Domino Tiles", # 1F030..1F09F
|
||||
"Playing Cards", # 1F0A0..1F0FF
|
||||
"Enclosed Alphanumeric Supplement", # 1F100..1F1FF
|
||||
"Enclosed Ideographic Supplement", # 1F200..1F2FF
|
||||
"Miscellaneous Symbols and Pictographs", # 1F300..1F5FF
|
||||
"Emoticons", # 1F600..1F64F
|
||||
"Ornamental Dingbats", # 1F650..1F67F
|
||||
"Transport and Map Symbols", # 1F680..1F6FF
|
||||
"Alchemical Symbols", # 1F700..1F77F
|
||||
"Geometric Shapes Extended", # 1F780..1F7FF
|
||||
"Supplemental Arrows-C", # 1F800..1F8FF
|
||||
"Supplemental Symbols and Pictographs", # 1F900..1F9FF
|
||||
"Chess Symbols", # 1FA00..1FA6F
|
||||
"Symbols and Pictographs Extended-A", # 1FA70..1FAFF
|
||||
"Symbols for Legacy Computing", # 1FB00..1FBFF
|
||||
"No_Block", # 1FC00..1FFFF
|
||||
"CJK Unified Ideographs Extension B", # 20000..2A6DF
|
||||
"No_Block", # 2A6E0..2A6FF
|
||||
"CJK Unified Ideographs Extension C", # 2A700..2B73F
|
||||
"CJK Unified Ideographs Extension D", # 2B740..2B81F
|
||||
"CJK Unified Ideographs Extension E", # 2B820..2CEAF
|
||||
"CJK Unified Ideographs Extension F", # 2CEB0..2EBEF
|
||||
"CJK Unified Ideographs Extension I", # 2EBF0..2EE5F
|
||||
"No_Block", # 2EE60..2F7FF
|
||||
"CJK Compatibility Ideographs Supplement", # 2F800..2FA1F
|
||||
"No_Block", # 2FA20..2FFFF
|
||||
"CJK Unified Ideographs Extension G", # 30000..3134F
|
||||
"CJK Unified Ideographs Extension H", # 31350..323AF
|
||||
"No_Block", # 323B0..DFFFF
|
||||
"Tags", # E0000..E007F
|
||||
"No_Block", # E0080..E00FF
|
||||
"Variation Selectors Supplement", # E0100..E01EF
|
||||
"No_Block", # E01F0..EFFFF
|
||||
"Supplementary Private Use Area-A", # F0000..FFFFF
|
||||
"Supplementary Private Use Area-B", # 100000..10FFFF
|
||||
]
|
||||
@ -0,0 +1,50 @@
|
||||
# Data updated to OpenType 1.8.2 as of January 2018.
|
||||
|
||||
# Complete list of OpenType script tags at:
|
||||
# https://www.microsoft.com/typography/otspec/scripttags.htm
|
||||
|
||||
# Most of the script tags are the same as the ISO 15924 tag but lowercased,
|
||||
# so we only have to handle the exceptional cases:
|
||||
# - KATAKANA and HIRAGANA both map to 'kana';
|
||||
# - spaces at the end are preserved, unlike ISO 15924;
|
||||
# - we map special script codes for Inherited, Common and Unknown to DFLT.
|
||||
|
||||
DEFAULT_SCRIPT = "DFLT"
|
||||
|
||||
SCRIPT_ALIASES = {
|
||||
"jamo": "hang",
|
||||
}
|
||||
|
||||
SCRIPT_EXCEPTIONS = {
|
||||
"Hira": "kana",
|
||||
"Hrkt": "kana",
|
||||
"Laoo": "lao ",
|
||||
"Yiii": "yi ",
|
||||
"Nkoo": "nko ",
|
||||
"Vaii": "vai ",
|
||||
"Zmth": "math",
|
||||
"Zinh": DEFAULT_SCRIPT,
|
||||
"Zyyy": DEFAULT_SCRIPT,
|
||||
"Zzzz": DEFAULT_SCRIPT,
|
||||
}
|
||||
|
||||
SCRIPT_EXCEPTIONS_REVERSED = {
|
||||
"math": "Zmth",
|
||||
}
|
||||
|
||||
NEW_SCRIPT_TAGS = {
|
||||
"Beng": ("bng2",),
|
||||
"Deva": ("dev2",),
|
||||
"Gujr": ("gjr2",),
|
||||
"Guru": ("gur2",),
|
||||
"Knda": ("knd2",),
|
||||
"Mlym": ("mlm2",),
|
||||
"Orya": ("ory2",),
|
||||
"Taml": ("tml2",),
|
||||
"Telu": ("tel2",),
|
||||
"Mymr": ("mym2",),
|
||||
}
|
||||
|
||||
NEW_SCRIPT_TAGS_REVERSED = {
|
||||
value: key for key, values in NEW_SCRIPT_TAGS.items() for value in values
|
||||
}
|
||||
@ -0,0 +1,806 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# NOTE: This file was auto-generated with MetaTools/buildUCD.py.
|
||||
# Source: https://unicode.org/Public/UNIDATA/ScriptExtensions.txt
|
||||
# License: http://unicode.org/copyright.html#License
|
||||
#
|
||||
# ScriptExtensions-16.0.0.txt
|
||||
# Date: 2024-07-30, 19:38:00 GMT
|
||||
# © 2024 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# For documentation, see https://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# The Script_Extensions property indicates which characters are commonly used
|
||||
# with more than one script, but with a limited number of scripts.
|
||||
# For each code point, there is one or more property values. Each such value is a Script property value.
|
||||
# For more information, see:
|
||||
# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/
|
||||
# Especially the sections:
|
||||
# https://www.unicode.org/reports/tr24/#Assignment_Script_Values
|
||||
# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
|
||||
#
|
||||
# Each Script_Extensions value in this file consists of a set
|
||||
# of one or more abbreviated Script property values. The ordering of the
|
||||
# values in that set is not material, but for stability in presentation
|
||||
# it is given here as alphabetical.
|
||||
#
|
||||
# All code points not explicitly listed for Script_Extensions
|
||||
# have as their value the corresponding Script property value.
|
||||
#
|
||||
# @missing: 0000..10FFFF; <script>
|
||||
|
||||
|
||||
RANGES = [
|
||||
0x0000, # .. 0x02BB ; None
|
||||
0x02BC, # .. 0x02BC ; {'Beng', 'Cyrl', 'Deva', 'Latn', 'Lisu', 'Thai', 'Toto'}
|
||||
0x02BD, # .. 0x02C6 ; None
|
||||
0x02C7, # .. 0x02C7 ; {'Bopo', 'Latn'}
|
||||
0x02C8, # .. 0x02C8 ; None
|
||||
0x02C9, # .. 0x02CB ; {'Bopo', 'Latn'}
|
||||
0x02CC, # .. 0x02CC ; None
|
||||
0x02CD, # .. 0x02CD ; {'Latn', 'Lisu'}
|
||||
0x02CE, # .. 0x02D6 ; None
|
||||
0x02D7, # .. 0x02D7 ; {'Latn', 'Thai'}
|
||||
0x02D8, # .. 0x02D8 ; None
|
||||
0x02D9, # .. 0x02D9 ; {'Bopo', 'Latn'}
|
||||
0x02DA, # .. 0x02FF ; None
|
||||
0x0300, # .. 0x0300 ; {'Cher', 'Copt', 'Cyrl', 'Grek', 'Latn', 'Perm', 'Sunu', 'Tale'}
|
||||
0x0301, # .. 0x0301 ; {'Cher', 'Cyrl', 'Grek', 'Latn', 'Osge', 'Sunu', 'Tale', 'Todr'}
|
||||
0x0302, # .. 0x0302 ; {'Cher', 'Cyrl', 'Latn', 'Tfng'}
|
||||
0x0303, # .. 0x0303 ; {'Glag', 'Latn', 'Sunu', 'Syrc', 'Thai'}
|
||||
0x0304, # .. 0x0304 ; {'Aghb', 'Cher', 'Copt', 'Cyrl', 'Goth', 'Grek', 'Latn', 'Osge', 'Syrc', 'Tfng', 'Todr'}
|
||||
0x0305, # .. 0x0305 ; {'Copt', 'Elba', 'Glag', 'Goth', 'Kana', 'Latn'}
|
||||
0x0306, # .. 0x0306 ; {'Cyrl', 'Grek', 'Latn', 'Perm'}
|
||||
0x0307, # .. 0x0307 ; {'Copt', 'Dupl', 'Hebr', 'Latn', 'Perm', 'Syrc', 'Tale', 'Tfng', 'Todr'}
|
||||
0x0308, # .. 0x0308 ; {'Armn', 'Cyrl', 'Dupl', 'Goth', 'Grek', 'Hebr', 'Latn', 'Perm', 'Syrc', 'Tale'}
|
||||
0x0309, # .. 0x0309 ; {'Latn', 'Tfng'}
|
||||
0x030A, # .. 0x030A ; {'Dupl', 'Latn', 'Syrc'}
|
||||
0x030B, # .. 0x030B ; {'Cher', 'Cyrl', 'Latn', 'Osge'}
|
||||
0x030C, # .. 0x030C ; {'Cher', 'Latn', 'Tale'}
|
||||
0x030D, # .. 0x030D ; {'Latn', 'Sunu'}
|
||||
0x030E, # .. 0x030E ; {'Ethi', 'Latn'}
|
||||
0x030F, # .. 0x030F ; None
|
||||
0x0310, # .. 0x0310 ; {'Latn', 'Sunu'}
|
||||
0x0311, # .. 0x0311 ; {'Cyrl', 'Latn', 'Todr'}
|
||||
0x0312, # .. 0x0312 ; None
|
||||
0x0313, # .. 0x0313 ; {'Grek', 'Latn', 'Perm', 'Todr'}
|
||||
0x0314, # .. 0x031F ; None
|
||||
0x0320, # .. 0x0320 ; {'Latn', 'Syrc'}
|
||||
0x0321, # .. 0x0322 ; None
|
||||
0x0323, # .. 0x0323 ; {'Cher', 'Dupl', 'Kana', 'Latn', 'Syrc'}
|
||||
0x0324, # .. 0x0324 ; {'Cher', 'Dupl', 'Latn', 'Syrc'}
|
||||
0x0325, # .. 0x0325 ; {'Latn', 'Syrc'}
|
||||
0x0326, # .. 0x032C ; None
|
||||
0x032D, # .. 0x032D ; {'Latn', 'Sunu', 'Syrc'}
|
||||
0x032E, # .. 0x032E ; {'Latn', 'Syrc'}
|
||||
0x032F, # .. 0x032F ; None
|
||||
0x0330, # .. 0x0330 ; {'Cher', 'Latn', 'Syrc'}
|
||||
0x0331, # .. 0x0331 ; {'Aghb', 'Cher', 'Goth', 'Latn', 'Sunu', 'Thai'}
|
||||
0x0332, # .. 0x0341 ; None
|
||||
0x0342, # .. 0x0342 ; {'Grek'}
|
||||
0x0343, # .. 0x0344 ; None
|
||||
0x0345, # .. 0x0345 ; {'Grek'}
|
||||
0x0346, # .. 0x0357 ; None
|
||||
0x0358, # .. 0x0358 ; {'Latn', 'Osge'}
|
||||
0x0359, # .. 0x035D ; None
|
||||
0x035E, # .. 0x035E ; {'Aghb', 'Latn', 'Todr'}
|
||||
0x035F, # .. 0x0362 ; None
|
||||
0x0363, # .. 0x036F ; {'Latn'}
|
||||
0x0370, # .. 0x0373 ; None
|
||||
0x0374, # .. 0x0375 ; {'Copt', 'Grek'}
|
||||
0x0376, # .. 0x0482 ; None
|
||||
0x0483, # .. 0x0483 ; {'Cyrl', 'Perm'}
|
||||
0x0484, # .. 0x0484 ; {'Cyrl', 'Glag'}
|
||||
0x0485, # .. 0x0486 ; {'Cyrl', 'Latn'}
|
||||
0x0487, # .. 0x0487 ; {'Cyrl', 'Glag'}
|
||||
0x0488, # .. 0x0588 ; None
|
||||
0x0589, # .. 0x0589 ; {'Armn', 'Geor', 'Glag'}
|
||||
0x058A, # .. 0x060B ; None
|
||||
0x060C, # .. 0x060C ; {'Arab', 'Gara', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}
|
||||
0x060D, # .. 0x061A ; None
|
||||
0x061B, # .. 0x061B ; {'Arab', 'Gara', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}
|
||||
0x061C, # .. 0x061C ; {'Arab', 'Syrc', 'Thaa'}
|
||||
0x061D, # .. 0x061E ; None
|
||||
0x061F, # .. 0x061F ; {'Adlm', 'Arab', 'Gara', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}
|
||||
0x0620, # .. 0x063F ; None
|
||||
0x0640, # .. 0x0640 ; {'Adlm', 'Arab', 'Mand', 'Mani', 'Ougr', 'Phlp', 'Rohg', 'Sogd', 'Syrc'}
|
||||
0x0641, # .. 0x064A ; None
|
||||
0x064B, # .. 0x0655 ; {'Arab', 'Syrc'}
|
||||
0x0656, # .. 0x065F ; None
|
||||
0x0660, # .. 0x0669 ; {'Arab', 'Thaa', 'Yezi'}
|
||||
0x066A, # .. 0x066F ; None
|
||||
0x0670, # .. 0x0670 ; {'Arab', 'Syrc'}
|
||||
0x0671, # .. 0x06D3 ; None
|
||||
0x06D4, # .. 0x06D4 ; {'Arab', 'Rohg'}
|
||||
0x06D5, # .. 0x0950 ; None
|
||||
0x0951, # .. 0x0951 ; {'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Latn', 'Mlym', 'Orya', 'Shrd', 'Taml', 'Telu', 'Tirh'}
|
||||
0x0952, # .. 0x0952 ; {'Beng', 'Deva', 'Gran', 'Gujr', 'Guru', 'Knda', 'Latn', 'Mlym', 'Orya', 'Taml', 'Telu', 'Tirh'}
|
||||
0x0953, # .. 0x0963 ; None
|
||||
0x0964, # .. 0x0964 ; {'Beng', 'Deva', 'Dogr', 'Gong', 'Gonm', 'Gran', 'Gujr', 'Guru', 'Knda', 'Mahj', 'Mlym', 'Nand', 'Onao', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', 'Telu', 'Tirh'}
|
||||
0x0965, # .. 0x0965 ; {'Beng', 'Deva', 'Dogr', 'Gong', 'Gonm', 'Gran', 'Gujr', 'Gukh', 'Guru', 'Knda', 'Limb', 'Mahj', 'Mlym', 'Nand', 'Onao', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', 'Telu', 'Tirh'}
|
||||
0x0966, # .. 0x096F ; {'Deva', 'Dogr', 'Kthi', 'Mahj'}
|
||||
0x0970, # .. 0x09E5 ; None
|
||||
0x09E6, # .. 0x09EF ; {'Beng', 'Cakm', 'Sylo'}
|
||||
0x09F0, # .. 0x0A65 ; None
|
||||
0x0A66, # .. 0x0A6F ; {'Guru', 'Mult'}
|
||||
0x0A70, # .. 0x0AE5 ; None
|
||||
0x0AE6, # .. 0x0AEF ; {'Gujr', 'Khoj'}
|
||||
0x0AF0, # .. 0x0BE5 ; None
|
||||
0x0BE6, # .. 0x0BF3 ; {'Gran', 'Taml'}
|
||||
0x0BF4, # .. 0x0CE5 ; None
|
||||
0x0CE6, # .. 0x0CEF ; {'Knda', 'Nand', 'Tutg'}
|
||||
0x0CF0, # .. 0x103F ; None
|
||||
0x1040, # .. 0x1049 ; {'Cakm', 'Mymr', 'Tale'}
|
||||
0x104A, # .. 0x10FA ; None
|
||||
0x10FB, # .. 0x10FB ; {'Geor', 'Glag', 'Latn'}
|
||||
0x10FC, # .. 0x16EA ; None
|
||||
0x16EB, # .. 0x16ED ; {'Runr'}
|
||||
0x16EE, # .. 0x1734 ; None
|
||||
0x1735, # .. 0x1736 ; {'Buhd', 'Hano', 'Tagb', 'Tglg'}
|
||||
0x1737, # .. 0x1801 ; None
|
||||
0x1802, # .. 0x1803 ; {'Mong', 'Phag'}
|
||||
0x1804, # .. 0x1804 ; None
|
||||
0x1805, # .. 0x1805 ; {'Mong', 'Phag'}
|
||||
0x1806, # .. 0x1CCF ; None
|
||||
0x1CD0, # .. 0x1CD0 ; {'Beng', 'Deva', 'Gran', 'Knda'}
|
||||
0x1CD1, # .. 0x1CD1 ; {'Deva'}
|
||||
0x1CD2, # .. 0x1CD2 ; {'Beng', 'Deva', 'Gran', 'Knda'}
|
||||
0x1CD3, # .. 0x1CD3 ; {'Deva', 'Gran', 'Knda'}
|
||||
0x1CD4, # .. 0x1CD4 ; {'Deva'}
|
||||
0x1CD5, # .. 0x1CD6 ; {'Beng', 'Deva'}
|
||||
0x1CD7, # .. 0x1CD7 ; {'Deva', 'Shrd'}
|
||||
0x1CD8, # .. 0x1CD8 ; {'Beng', 'Deva'}
|
||||
0x1CD9, # .. 0x1CD9 ; {'Deva', 'Shrd'}
|
||||
0x1CDA, # .. 0x1CDA ; {'Deva', 'Knda', 'Mlym', 'Orya', 'Taml', 'Telu'}
|
||||
0x1CDB, # .. 0x1CDB ; {'Deva'}
|
||||
0x1CDC, # .. 0x1CDD ; {'Deva', 'Shrd'}
|
||||
0x1CDE, # .. 0x1CDF ; {'Deva'}
|
||||
0x1CE0, # .. 0x1CE0 ; {'Deva', 'Shrd'}
|
||||
0x1CE1, # .. 0x1CE1 ; {'Beng', 'Deva'}
|
||||
0x1CE2, # .. 0x1CE8 ; {'Deva'}
|
||||
0x1CE9, # .. 0x1CE9 ; {'Deva', 'Nand'}
|
||||
0x1CEA, # .. 0x1CEA ; {'Beng', 'Deva'}
|
||||
0x1CEB, # .. 0x1CEC ; {'Deva'}
|
||||
0x1CED, # .. 0x1CED ; {'Beng', 'Deva'}
|
||||
0x1CEE, # .. 0x1CF1 ; {'Deva'}
|
||||
0x1CF2, # .. 0x1CF2 ; {'Beng', 'Deva', 'Gran', 'Knda', 'Mlym', 'Nand', 'Orya', 'Sinh', 'Telu', 'Tirh', 'Tutg'}
|
||||
0x1CF3, # .. 0x1CF3 ; {'Deva', 'Gran'}
|
||||
0x1CF4, # .. 0x1CF4 ; {'Deva', 'Gran', 'Knda', 'Tutg'}
|
||||
0x1CF5, # .. 0x1CF6 ; {'Beng', 'Deva'}
|
||||
0x1CF7, # .. 0x1CF7 ; {'Beng'}
|
||||
0x1CF8, # .. 0x1CF9 ; {'Deva', 'Gran'}
|
||||
0x1CFA, # .. 0x1CFA ; {'Nand'}
|
||||
0x1CFB, # .. 0x1DBF ; None
|
||||
0x1DC0, # .. 0x1DC1 ; {'Grek'}
|
||||
0x1DC2, # .. 0x1DF7 ; None
|
||||
0x1DF8, # .. 0x1DF8 ; {'Cyrl', 'Latn', 'Syrc'}
|
||||
0x1DF9, # .. 0x1DF9 ; None
|
||||
0x1DFA, # .. 0x1DFA ; {'Syrc'}
|
||||
0x1DFB, # .. 0x202E ; None
|
||||
0x202F, # .. 0x202F ; {'Latn', 'Mong', 'Phag'}
|
||||
0x2030, # .. 0x204E ; None
|
||||
0x204F, # .. 0x204F ; {'Adlm', 'Arab'}
|
||||
0x2050, # .. 0x2059 ; None
|
||||
0x205A, # .. 0x205A ; {'Cari', 'Geor', 'Glag', 'Hung', 'Lyci', 'Orkh'}
|
||||
0x205B, # .. 0x205C ; None
|
||||
0x205D, # .. 0x205D ; {'Cari', 'Grek', 'Hung', 'Mero'}
|
||||
0x205E, # .. 0x20EF ; None
|
||||
0x20F0, # .. 0x20F0 ; {'Deva', 'Gran', 'Latn'}
|
||||
0x20F1, # .. 0x2E16 ; None
|
||||
0x2E17, # .. 0x2E17 ; {'Copt', 'Latn'}
|
||||
0x2E18, # .. 0x2E2F ; None
|
||||
0x2E30, # .. 0x2E30 ; {'Avst', 'Orkh'}
|
||||
0x2E31, # .. 0x2E31 ; {'Avst', 'Cari', 'Geor', 'Hung', 'Kthi', 'Lydi', 'Samr'}
|
||||
0x2E32, # .. 0x2E3B ; None
|
||||
0x2E3C, # .. 0x2E3C ; {'Dupl'}
|
||||
0x2E3D, # .. 0x2E40 ; None
|
||||
0x2E41, # .. 0x2E41 ; {'Adlm', 'Arab', 'Hung'}
|
||||
0x2E42, # .. 0x2E42 ; None
|
||||
0x2E43, # .. 0x2E43 ; {'Cyrl', 'Glag'}
|
||||
0x2E44, # .. 0x2FEF ; None
|
||||
0x2FF0, # .. 0x2FFF ; {'Hani', 'Tang'}
|
||||
0x3000, # .. 0x3000 ; None
|
||||
0x3001, # .. 0x3001 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Mong', 'Yiii'}
|
||||
0x3002, # .. 0x3002 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Mong', 'Phag', 'Yiii'}
|
||||
0x3003, # .. 0x3003 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||
0x3004, # .. 0x3005 ; None
|
||||
0x3006, # .. 0x3006 ; {'Hani'}
|
||||
0x3007, # .. 0x3007 ; None
|
||||
0x3008, # .. 0x3009 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Mong', 'Tibt', 'Yiii'}
|
||||
0x300A, # .. 0x300B ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Lisu', 'Mong', 'Tibt', 'Yiii'}
|
||||
0x300C, # .. 0x3011 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
|
||||
0x3012, # .. 0x3012 ; None
|
||||
0x3013, # .. 0x3013 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||
0x3014, # .. 0x301B ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
|
||||
0x301C, # .. 0x301F ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||
0x3020, # .. 0x3029 ; None
|
||||
0x302A, # .. 0x302D ; {'Bopo', 'Hani'}
|
||||
0x302E, # .. 0x302F ; None
|
||||
0x3030, # .. 0x3030 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||
0x3031, # .. 0x3035 ; {'Hira', 'Kana'}
|
||||
0x3036, # .. 0x3036 ; None
|
||||
0x3037, # .. 0x3037 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||
0x3038, # .. 0x303B ; None
|
||||
0x303C, # .. 0x303D ; {'Hani', 'Hira', 'Kana'}
|
||||
0x303E, # .. 0x303F ; {'Hani'}
|
||||
0x3040, # .. 0x3098 ; None
|
||||
0x3099, # .. 0x309C ; {'Hira', 'Kana'}
|
||||
0x309D, # .. 0x309F ; None
|
||||
0x30A0, # .. 0x30A0 ; {'Hira', 'Kana'}
|
||||
0x30A1, # .. 0x30FA ; None
|
||||
0x30FB, # .. 0x30FB ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
|
||||
0x30FC, # .. 0x30FC ; {'Hira', 'Kana'}
|
||||
0x30FD, # .. 0x318F ; None
|
||||
0x3190, # .. 0x319F ; {'Hani'}
|
||||
0x31A0, # .. 0x31BF ; None
|
||||
0x31C0, # .. 0x31E5 ; {'Hani'}
|
||||
0x31E6, # .. 0x31EE ; None
|
||||
0x31EF, # .. 0x31EF ; {'Hani', 'Tang'}
|
||||
0x31F0, # .. 0x321F ; None
|
||||
0x3220, # .. 0x3247 ; {'Hani'}
|
||||
0x3248, # .. 0x327F ; None
|
||||
0x3280, # .. 0x32B0 ; {'Hani'}
|
||||
0x32B1, # .. 0x32BF ; None
|
||||
0x32C0, # .. 0x32CB ; {'Hani'}
|
||||
0x32CC, # .. 0x32FE ; None
|
||||
0x32FF, # .. 0x32FF ; {'Hani'}
|
||||
0x3300, # .. 0x3357 ; None
|
||||
0x3358, # .. 0x3370 ; {'Hani'}
|
||||
0x3371, # .. 0x337A ; None
|
||||
0x337B, # .. 0x337F ; {'Hani'}
|
||||
0x3380, # .. 0x33DF ; None
|
||||
0x33E0, # .. 0x33FE ; {'Hani'}
|
||||
0x33FF, # .. 0xA66E ; None
|
||||
0xA66F, # .. 0xA66F ; {'Cyrl', 'Glag'}
|
||||
0xA670, # .. 0xA6FF ; None
|
||||
0xA700, # .. 0xA707 ; {'Hani', 'Latn'}
|
||||
0xA708, # .. 0xA82F ; None
|
||||
0xA830, # .. 0xA832 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Knda', 'Kthi', 'Mahj', 'Mlym', 'Modi', 'Nand', 'Shrd', 'Sind', 'Takr', 'Tirh', 'Tutg'}
|
||||
0xA833, # .. 0xA835 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Knda', 'Kthi', 'Mahj', 'Modi', 'Nand', 'Shrd', 'Sind', 'Takr', 'Tirh', 'Tutg'}
|
||||
0xA836, # .. 0xA837 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Kthi', 'Mahj', 'Modi', 'Sind', 'Takr', 'Tirh'}
|
||||
0xA838, # .. 0xA838 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Kthi', 'Mahj', 'Modi', 'Shrd', 'Sind', 'Takr', 'Tirh'}
|
||||
0xA839, # .. 0xA839 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Kthi', 'Mahj', 'Modi', 'Sind', 'Takr', 'Tirh'}
|
||||
0xA83A, # .. 0xA8F0 ; None
|
||||
0xA8F1, # .. 0xA8F1 ; {'Beng', 'Deva', 'Tutg'}
|
||||
0xA8F2, # .. 0xA8F2 ; None
|
||||
0xA8F3, # .. 0xA8F3 ; {'Deva', 'Taml'}
|
||||
0xA8F4, # .. 0xA92D ; None
|
||||
0xA92E, # .. 0xA92E ; {'Kali', 'Latn', 'Mymr'}
|
||||
0xA92F, # .. 0xA9CE ; None
|
||||
0xA9CF, # .. 0xA9CF ; {'Bugi', 'Java'}
|
||||
0xA9D0, # .. 0xFD3D ; None
|
||||
0xFD3E, # .. 0xFD3F ; {'Arab', 'Nkoo'}
|
||||
0xFD40, # .. 0xFDF1 ; None
|
||||
0xFDF2, # .. 0xFDF2 ; {'Arab', 'Thaa'}
|
||||
0xFDF3, # .. 0xFDFC ; None
|
||||
0xFDFD, # .. 0xFDFD ; {'Arab', 'Thaa'}
|
||||
0xFDFE, # .. 0xFE44 ; None
|
||||
0xFE45, # .. 0xFE46 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana'}
|
||||
0xFE47, # .. 0xFF60 ; None
|
||||
0xFF61, # .. 0xFF65 ; {'Bopo', 'Hang', 'Hani', 'Hira', 'Kana', 'Yiii'}
|
||||
0xFF66, # .. 0xFF6F ; None
|
||||
0xFF70, # .. 0xFF70 ; {'Hira', 'Kana'}
|
||||
0xFF71, # .. 0xFF9D ; None
|
||||
0xFF9E, # .. 0xFF9F ; {'Hira', 'Kana'}
|
||||
0xFFA0, # .. 0x100FF ; None
|
||||
0x10100, # .. 0x10101 ; {'Cpmn', 'Cprt', 'Linb'}
|
||||
0x10102, # .. 0x10102 ; {'Cprt', 'Linb'}
|
||||
0x10103, # .. 0x10106 ; None
|
||||
0x10107, # .. 0x10133 ; {'Cprt', 'Lina', 'Linb'}
|
||||
0x10134, # .. 0x10136 ; None
|
||||
0x10137, # .. 0x1013F ; {'Cprt', 'Linb'}
|
||||
0x10140, # .. 0x102DF ; None
|
||||
0x102E0, # .. 0x102FB ; {'Arab', 'Copt'}
|
||||
0x102FC, # .. 0x10AF1 ; None
|
||||
0x10AF2, # .. 0x10AF2 ; {'Mani', 'Ougr'}
|
||||
0x10AF3, # .. 0x11300 ; None
|
||||
0x11301, # .. 0x11301 ; {'Gran', 'Taml'}
|
||||
0x11302, # .. 0x11302 ; None
|
||||
0x11303, # .. 0x11303 ; {'Gran', 'Taml'}
|
||||
0x11304, # .. 0x1133A ; None
|
||||
0x1133B, # .. 0x1133C ; {'Gran', 'Taml'}
|
||||
0x1133D, # .. 0x11FCF ; None
|
||||
0x11FD0, # .. 0x11FD1 ; {'Gran', 'Taml'}
|
||||
0x11FD2, # .. 0x11FD2 ; None
|
||||
0x11FD3, # .. 0x11FD3 ; {'Gran', 'Taml'}
|
||||
0x11FD4, # .. 0x1BC9F ; None
|
||||
0x1BCA0, # .. 0x1BCA3 ; {'Dupl'}
|
||||
0x1BCA4, # .. 0x1D35F ; None
|
||||
0x1D360, # .. 0x1D371 ; {'Hani'}
|
||||
0x1D372, # .. 0x1F24F ; None
|
||||
0x1F250, # .. 0x1F251 ; {'Hani'}
|
||||
0x1F252, # .. 0x10FFFF ; None
|
||||
]
|
||||
|
||||
VALUES = [
|
||||
None, # 0000..02BB
|
||||
{"Beng", "Cyrl", "Deva", "Latn", "Lisu", "Thai", "Toto"}, # 02BC..02BC
|
||||
None, # 02BD..02C6
|
||||
{"Bopo", "Latn"}, # 02C7..02C7
|
||||
None, # 02C8..02C8
|
||||
{"Bopo", "Latn"}, # 02C9..02CB
|
||||
None, # 02CC..02CC
|
||||
{"Latn", "Lisu"}, # 02CD..02CD
|
||||
None, # 02CE..02D6
|
||||
{"Latn", "Thai"}, # 02D7..02D7
|
||||
None, # 02D8..02D8
|
||||
{"Bopo", "Latn"}, # 02D9..02D9
|
||||
None, # 02DA..02FF
|
||||
{"Cher", "Copt", "Cyrl", "Grek", "Latn", "Perm", "Sunu", "Tale"}, # 0300..0300
|
||||
{"Cher", "Cyrl", "Grek", "Latn", "Osge", "Sunu", "Tale", "Todr"}, # 0301..0301
|
||||
{"Cher", "Cyrl", "Latn", "Tfng"}, # 0302..0302
|
||||
{"Glag", "Latn", "Sunu", "Syrc", "Thai"}, # 0303..0303
|
||||
{
|
||||
"Aghb",
|
||||
"Cher",
|
||||
"Copt",
|
||||
"Cyrl",
|
||||
"Goth",
|
||||
"Grek",
|
||||
"Latn",
|
||||
"Osge",
|
||||
"Syrc",
|
||||
"Tfng",
|
||||
"Todr",
|
||||
}, # 0304..0304
|
||||
{"Copt", "Elba", "Glag", "Goth", "Kana", "Latn"}, # 0305..0305
|
||||
{"Cyrl", "Grek", "Latn", "Perm"}, # 0306..0306
|
||||
{
|
||||
"Copt",
|
||||
"Dupl",
|
||||
"Hebr",
|
||||
"Latn",
|
||||
"Perm",
|
||||
"Syrc",
|
||||
"Tale",
|
||||
"Tfng",
|
||||
"Todr",
|
||||
}, # 0307..0307
|
||||
{
|
||||
"Armn",
|
||||
"Cyrl",
|
||||
"Dupl",
|
||||
"Goth",
|
||||
"Grek",
|
||||
"Hebr",
|
||||
"Latn",
|
||||
"Perm",
|
||||
"Syrc",
|
||||
"Tale",
|
||||
}, # 0308..0308
|
||||
{"Latn", "Tfng"}, # 0309..0309
|
||||
{"Dupl", "Latn", "Syrc"}, # 030A..030A
|
||||
{"Cher", "Cyrl", "Latn", "Osge"}, # 030B..030B
|
||||
{"Cher", "Latn", "Tale"}, # 030C..030C
|
||||
{"Latn", "Sunu"}, # 030D..030D
|
||||
{"Ethi", "Latn"}, # 030E..030E
|
||||
None, # 030F..030F
|
||||
{"Latn", "Sunu"}, # 0310..0310
|
||||
{"Cyrl", "Latn", "Todr"}, # 0311..0311
|
||||
None, # 0312..0312
|
||||
{"Grek", "Latn", "Perm", "Todr"}, # 0313..0313
|
||||
None, # 0314..031F
|
||||
{"Latn", "Syrc"}, # 0320..0320
|
||||
None, # 0321..0322
|
||||
{"Cher", "Dupl", "Kana", "Latn", "Syrc"}, # 0323..0323
|
||||
{"Cher", "Dupl", "Latn", "Syrc"}, # 0324..0324
|
||||
{"Latn", "Syrc"}, # 0325..0325
|
||||
None, # 0326..032C
|
||||
{"Latn", "Sunu", "Syrc"}, # 032D..032D
|
||||
{"Latn", "Syrc"}, # 032E..032E
|
||||
None, # 032F..032F
|
||||
{"Cher", "Latn", "Syrc"}, # 0330..0330
|
||||
{"Aghb", "Cher", "Goth", "Latn", "Sunu", "Thai"}, # 0331..0331
|
||||
None, # 0332..0341
|
||||
{"Grek"}, # 0342..0342
|
||||
None, # 0343..0344
|
||||
{"Grek"}, # 0345..0345
|
||||
None, # 0346..0357
|
||||
{"Latn", "Osge"}, # 0358..0358
|
||||
None, # 0359..035D
|
||||
{"Aghb", "Latn", "Todr"}, # 035E..035E
|
||||
None, # 035F..0362
|
||||
{"Latn"}, # 0363..036F
|
||||
None, # 0370..0373
|
||||
{"Copt", "Grek"}, # 0374..0375
|
||||
None, # 0376..0482
|
||||
{"Cyrl", "Perm"}, # 0483..0483
|
||||
{"Cyrl", "Glag"}, # 0484..0484
|
||||
{"Cyrl", "Latn"}, # 0485..0486
|
||||
{"Cyrl", "Glag"}, # 0487..0487
|
||||
None, # 0488..0588
|
||||
{"Armn", "Geor", "Glag"}, # 0589..0589
|
||||
None, # 058A..060B
|
||||
{"Arab", "Gara", "Nkoo", "Rohg", "Syrc", "Thaa", "Yezi"}, # 060C..060C
|
||||
None, # 060D..061A
|
||||
{"Arab", "Gara", "Nkoo", "Rohg", "Syrc", "Thaa", "Yezi"}, # 061B..061B
|
||||
{"Arab", "Syrc", "Thaa"}, # 061C..061C
|
||||
None, # 061D..061E
|
||||
{"Adlm", "Arab", "Gara", "Nkoo", "Rohg", "Syrc", "Thaa", "Yezi"}, # 061F..061F
|
||||
None, # 0620..063F
|
||||
{
|
||||
"Adlm",
|
||||
"Arab",
|
||||
"Mand",
|
||||
"Mani",
|
||||
"Ougr",
|
||||
"Phlp",
|
||||
"Rohg",
|
||||
"Sogd",
|
||||
"Syrc",
|
||||
}, # 0640..0640
|
||||
None, # 0641..064A
|
||||
{"Arab", "Syrc"}, # 064B..0655
|
||||
None, # 0656..065F
|
||||
{"Arab", "Thaa", "Yezi"}, # 0660..0669
|
||||
None, # 066A..066F
|
||||
{"Arab", "Syrc"}, # 0670..0670
|
||||
None, # 0671..06D3
|
||||
{"Arab", "Rohg"}, # 06D4..06D4
|
||||
None, # 06D5..0950
|
||||
{
|
||||
"Beng",
|
||||
"Deva",
|
||||
"Gran",
|
||||
"Gujr",
|
||||
"Guru",
|
||||
"Knda",
|
||||
"Latn",
|
||||
"Mlym",
|
||||
"Orya",
|
||||
"Shrd",
|
||||
"Taml",
|
||||
"Telu",
|
||||
"Tirh",
|
||||
}, # 0951..0951
|
||||
{
|
||||
"Beng",
|
||||
"Deva",
|
||||
"Gran",
|
||||
"Gujr",
|
||||
"Guru",
|
||||
"Knda",
|
||||
"Latn",
|
||||
"Mlym",
|
||||
"Orya",
|
||||
"Taml",
|
||||
"Telu",
|
||||
"Tirh",
|
||||
}, # 0952..0952
|
||||
None, # 0953..0963
|
||||
{
|
||||
"Beng",
|
||||
"Deva",
|
||||
"Dogr",
|
||||
"Gong",
|
||||
"Gonm",
|
||||
"Gran",
|
||||
"Gujr",
|
||||
"Guru",
|
||||
"Knda",
|
||||
"Mahj",
|
||||
"Mlym",
|
||||
"Nand",
|
||||
"Onao",
|
||||
"Orya",
|
||||
"Sind",
|
||||
"Sinh",
|
||||
"Sylo",
|
||||
"Takr",
|
||||
"Taml",
|
||||
"Telu",
|
||||
"Tirh",
|
||||
}, # 0964..0964
|
||||
{
|
||||
"Beng",
|
||||
"Deva",
|
||||
"Dogr",
|
||||
"Gong",
|
||||
"Gonm",
|
||||
"Gran",
|
||||
"Gujr",
|
||||
"Gukh",
|
||||
"Guru",
|
||||
"Knda",
|
||||
"Limb",
|
||||
"Mahj",
|
||||
"Mlym",
|
||||
"Nand",
|
||||
"Onao",
|
||||
"Orya",
|
||||
"Sind",
|
||||
"Sinh",
|
||||
"Sylo",
|
||||
"Takr",
|
||||
"Taml",
|
||||
"Telu",
|
||||
"Tirh",
|
||||
}, # 0965..0965
|
||||
{"Deva", "Dogr", "Kthi", "Mahj"}, # 0966..096F
|
||||
None, # 0970..09E5
|
||||
{"Beng", "Cakm", "Sylo"}, # 09E6..09EF
|
||||
None, # 09F0..0A65
|
||||
{"Guru", "Mult"}, # 0A66..0A6F
|
||||
None, # 0A70..0AE5
|
||||
{"Gujr", "Khoj"}, # 0AE6..0AEF
|
||||
None, # 0AF0..0BE5
|
||||
{"Gran", "Taml"}, # 0BE6..0BF3
|
||||
None, # 0BF4..0CE5
|
||||
{"Knda", "Nand", "Tutg"}, # 0CE6..0CEF
|
||||
None, # 0CF0..103F
|
||||
{"Cakm", "Mymr", "Tale"}, # 1040..1049
|
||||
None, # 104A..10FA
|
||||
{"Geor", "Glag", "Latn"}, # 10FB..10FB
|
||||
None, # 10FC..16EA
|
||||
{"Runr"}, # 16EB..16ED
|
||||
None, # 16EE..1734
|
||||
{"Buhd", "Hano", "Tagb", "Tglg"}, # 1735..1736
|
||||
None, # 1737..1801
|
||||
{"Mong", "Phag"}, # 1802..1803
|
||||
None, # 1804..1804
|
||||
{"Mong", "Phag"}, # 1805..1805
|
||||
None, # 1806..1CCF
|
||||
{"Beng", "Deva", "Gran", "Knda"}, # 1CD0..1CD0
|
||||
{"Deva"}, # 1CD1..1CD1
|
||||
{"Beng", "Deva", "Gran", "Knda"}, # 1CD2..1CD2
|
||||
{"Deva", "Gran", "Knda"}, # 1CD3..1CD3
|
||||
{"Deva"}, # 1CD4..1CD4
|
||||
{"Beng", "Deva"}, # 1CD5..1CD6
|
||||
{"Deva", "Shrd"}, # 1CD7..1CD7
|
||||
{"Beng", "Deva"}, # 1CD8..1CD8
|
||||
{"Deva", "Shrd"}, # 1CD9..1CD9
|
||||
{"Deva", "Knda", "Mlym", "Orya", "Taml", "Telu"}, # 1CDA..1CDA
|
||||
{"Deva"}, # 1CDB..1CDB
|
||||
{"Deva", "Shrd"}, # 1CDC..1CDD
|
||||
{"Deva"}, # 1CDE..1CDF
|
||||
{"Deva", "Shrd"}, # 1CE0..1CE0
|
||||
{"Beng", "Deva"}, # 1CE1..1CE1
|
||||
{"Deva"}, # 1CE2..1CE8
|
||||
{"Deva", "Nand"}, # 1CE9..1CE9
|
||||
{"Beng", "Deva"}, # 1CEA..1CEA
|
||||
{"Deva"}, # 1CEB..1CEC
|
||||
{"Beng", "Deva"}, # 1CED..1CED
|
||||
{"Deva"}, # 1CEE..1CF1
|
||||
{
|
||||
"Beng",
|
||||
"Deva",
|
||||
"Gran",
|
||||
"Knda",
|
||||
"Mlym",
|
||||
"Nand",
|
||||
"Orya",
|
||||
"Sinh",
|
||||
"Telu",
|
||||
"Tirh",
|
||||
"Tutg",
|
||||
}, # 1CF2..1CF2
|
||||
{"Deva", "Gran"}, # 1CF3..1CF3
|
||||
{"Deva", "Gran", "Knda", "Tutg"}, # 1CF4..1CF4
|
||||
{"Beng", "Deva"}, # 1CF5..1CF6
|
||||
{"Beng"}, # 1CF7..1CF7
|
||||
{"Deva", "Gran"}, # 1CF8..1CF9
|
||||
{"Nand"}, # 1CFA..1CFA
|
||||
None, # 1CFB..1DBF
|
||||
{"Grek"}, # 1DC0..1DC1
|
||||
None, # 1DC2..1DF7
|
||||
{"Cyrl", "Latn", "Syrc"}, # 1DF8..1DF8
|
||||
None, # 1DF9..1DF9
|
||||
{"Syrc"}, # 1DFA..1DFA
|
||||
None, # 1DFB..202E
|
||||
{"Latn", "Mong", "Phag"}, # 202F..202F
|
||||
None, # 2030..204E
|
||||
{"Adlm", "Arab"}, # 204F..204F
|
||||
None, # 2050..2059
|
||||
{"Cari", "Geor", "Glag", "Hung", "Lyci", "Orkh"}, # 205A..205A
|
||||
None, # 205B..205C
|
||||
{"Cari", "Grek", "Hung", "Mero"}, # 205D..205D
|
||||
None, # 205E..20EF
|
||||
{"Deva", "Gran", "Latn"}, # 20F0..20F0
|
||||
None, # 20F1..2E16
|
||||
{"Copt", "Latn"}, # 2E17..2E17
|
||||
None, # 2E18..2E2F
|
||||
{"Avst", "Orkh"}, # 2E30..2E30
|
||||
{"Avst", "Cari", "Geor", "Hung", "Kthi", "Lydi", "Samr"}, # 2E31..2E31
|
||||
None, # 2E32..2E3B
|
||||
{"Dupl"}, # 2E3C..2E3C
|
||||
None, # 2E3D..2E40
|
||||
{"Adlm", "Arab", "Hung"}, # 2E41..2E41
|
||||
None, # 2E42..2E42
|
||||
{"Cyrl", "Glag"}, # 2E43..2E43
|
||||
None, # 2E44..2FEF
|
||||
{"Hani", "Tang"}, # 2FF0..2FFF
|
||||
None, # 3000..3000
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Mong", "Yiii"}, # 3001..3001
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Mong", "Phag", "Yiii"}, # 3002..3002
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # 3003..3003
|
||||
None, # 3004..3005
|
||||
{"Hani"}, # 3006..3006
|
||||
None, # 3007..3007
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Mong", "Tibt", "Yiii"}, # 3008..3009
|
||||
{
|
||||
"Bopo",
|
||||
"Hang",
|
||||
"Hani",
|
||||
"Hira",
|
||||
"Kana",
|
||||
"Lisu",
|
||||
"Mong",
|
||||
"Tibt",
|
||||
"Yiii",
|
||||
}, # 300A..300B
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Yiii"}, # 300C..3011
|
||||
None, # 3012..3012
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # 3013..3013
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Yiii"}, # 3014..301B
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # 301C..301F
|
||||
None, # 3020..3029
|
||||
{"Bopo", "Hani"}, # 302A..302D
|
||||
None, # 302E..302F
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # 3030..3030
|
||||
{"Hira", "Kana"}, # 3031..3035
|
||||
None, # 3036..3036
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # 3037..3037
|
||||
None, # 3038..303B
|
||||
{"Hani", "Hira", "Kana"}, # 303C..303D
|
||||
{"Hani"}, # 303E..303F
|
||||
None, # 3040..3098
|
||||
{"Hira", "Kana"}, # 3099..309C
|
||||
None, # 309D..309F
|
||||
{"Hira", "Kana"}, # 30A0..30A0
|
||||
None, # 30A1..30FA
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Yiii"}, # 30FB..30FB
|
||||
{"Hira", "Kana"}, # 30FC..30FC
|
||||
None, # 30FD..318F
|
||||
{"Hani"}, # 3190..319F
|
||||
None, # 31A0..31BF
|
||||
{"Hani"}, # 31C0..31E5
|
||||
None, # 31E6..31EE
|
||||
{"Hani", "Tang"}, # 31EF..31EF
|
||||
None, # 31F0..321F
|
||||
{"Hani"}, # 3220..3247
|
||||
None, # 3248..327F
|
||||
{"Hani"}, # 3280..32B0
|
||||
None, # 32B1..32BF
|
||||
{"Hani"}, # 32C0..32CB
|
||||
None, # 32CC..32FE
|
||||
{"Hani"}, # 32FF..32FF
|
||||
None, # 3300..3357
|
||||
{"Hani"}, # 3358..3370
|
||||
None, # 3371..337A
|
||||
{"Hani"}, # 337B..337F
|
||||
None, # 3380..33DF
|
||||
{"Hani"}, # 33E0..33FE
|
||||
None, # 33FF..A66E
|
||||
{"Cyrl", "Glag"}, # A66F..A66F
|
||||
None, # A670..A6FF
|
||||
{"Hani", "Latn"}, # A700..A707
|
||||
None, # A708..A82F
|
||||
{
|
||||
"Deva",
|
||||
"Dogr",
|
||||
"Gujr",
|
||||
"Guru",
|
||||
"Khoj",
|
||||
"Knda",
|
||||
"Kthi",
|
||||
"Mahj",
|
||||
"Mlym",
|
||||
"Modi",
|
||||
"Nand",
|
||||
"Shrd",
|
||||
"Sind",
|
||||
"Takr",
|
||||
"Tirh",
|
||||
"Tutg",
|
||||
}, # A830..A832
|
||||
{
|
||||
"Deva",
|
||||
"Dogr",
|
||||
"Gujr",
|
||||
"Guru",
|
||||
"Khoj",
|
||||
"Knda",
|
||||
"Kthi",
|
||||
"Mahj",
|
||||
"Modi",
|
||||
"Nand",
|
||||
"Shrd",
|
||||
"Sind",
|
||||
"Takr",
|
||||
"Tirh",
|
||||
"Tutg",
|
||||
}, # A833..A835
|
||||
{
|
||||
"Deva",
|
||||
"Dogr",
|
||||
"Gujr",
|
||||
"Guru",
|
||||
"Khoj",
|
||||
"Kthi",
|
||||
"Mahj",
|
||||
"Modi",
|
||||
"Sind",
|
||||
"Takr",
|
||||
"Tirh",
|
||||
}, # A836..A837
|
||||
{
|
||||
"Deva",
|
||||
"Dogr",
|
||||
"Gujr",
|
||||
"Guru",
|
||||
"Khoj",
|
||||
"Kthi",
|
||||
"Mahj",
|
||||
"Modi",
|
||||
"Shrd",
|
||||
"Sind",
|
||||
"Takr",
|
||||
"Tirh",
|
||||
}, # A838..A838
|
||||
{
|
||||
"Deva",
|
||||
"Dogr",
|
||||
"Gujr",
|
||||
"Guru",
|
||||
"Khoj",
|
||||
"Kthi",
|
||||
"Mahj",
|
||||
"Modi",
|
||||
"Sind",
|
||||
"Takr",
|
||||
"Tirh",
|
||||
}, # A839..A839
|
||||
None, # A83A..A8F0
|
||||
{"Beng", "Deva", "Tutg"}, # A8F1..A8F1
|
||||
None, # A8F2..A8F2
|
||||
{"Deva", "Taml"}, # A8F3..A8F3
|
||||
None, # A8F4..A92D
|
||||
{"Kali", "Latn", "Mymr"}, # A92E..A92E
|
||||
None, # A92F..A9CE
|
||||
{"Bugi", "Java"}, # A9CF..A9CF
|
||||
None, # A9D0..FD3D
|
||||
{"Arab", "Nkoo"}, # FD3E..FD3F
|
||||
None, # FD40..FDF1
|
||||
{"Arab", "Thaa"}, # FDF2..FDF2
|
||||
None, # FDF3..FDFC
|
||||
{"Arab", "Thaa"}, # FDFD..FDFD
|
||||
None, # FDFE..FE44
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana"}, # FE45..FE46
|
||||
None, # FE47..FF60
|
||||
{"Bopo", "Hang", "Hani", "Hira", "Kana", "Yiii"}, # FF61..FF65
|
||||
None, # FF66..FF6F
|
||||
{"Hira", "Kana"}, # FF70..FF70
|
||||
None, # FF71..FF9D
|
||||
{"Hira", "Kana"}, # FF9E..FF9F
|
||||
None, # FFA0..100FF
|
||||
{"Cpmn", "Cprt", "Linb"}, # 10100..10101
|
||||
{"Cprt", "Linb"}, # 10102..10102
|
||||
None, # 10103..10106
|
||||
{"Cprt", "Lina", "Linb"}, # 10107..10133
|
||||
None, # 10134..10136
|
||||
{"Cprt", "Linb"}, # 10137..1013F
|
||||
None, # 10140..102DF
|
||||
{"Arab", "Copt"}, # 102E0..102FB
|
||||
None, # 102FC..10AF1
|
||||
{"Mani", "Ougr"}, # 10AF2..10AF2
|
||||
None, # 10AF3..11300
|
||||
{"Gran", "Taml"}, # 11301..11301
|
||||
None, # 11302..11302
|
||||
{"Gran", "Taml"}, # 11303..11303
|
||||
None, # 11304..1133A
|
||||
{"Gran", "Taml"}, # 1133B..1133C
|
||||
None, # 1133D..11FCF
|
||||
{"Gran", "Taml"}, # 11FD0..11FD1
|
||||
None, # 11FD2..11FD2
|
||||
{"Gran", "Taml"}, # 11FD3..11FD3
|
||||
None, # 11FD4..1BC9F
|
||||
{"Dupl"}, # 1BCA0..1BCA3
|
||||
None, # 1BCA4..1D35F
|
||||
{"Hani"}, # 1D360..1D371
|
||||
None, # 1D372..1F24F
|
||||
{"Hani"}, # 1F250..1F251
|
||||
None, # 1F252..10FFFF
|
||||
]
|
||||
3618
venv/lib/python3.12/site-packages/fontTools/unicodedata/Scripts.py
Normal file
3618
venv/lib/python3.12/site-packages/fontTools/unicodedata/Scripts.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,298 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from fontTools.misc.textTools import byteord, tostr
|
||||
|
||||
import re
|
||||
from bisect import bisect_right
|
||||
from typing import Literal, TypeVar, overload
|
||||
|
||||
|
||||
try:
|
||||
# use unicodedata backport compatible with python2:
|
||||
# https://github.com/fonttools/unicodedata2
|
||||
from unicodedata2 import *
|
||||
except ImportError: # pragma: no cover
|
||||
# fall back to built-in unicodedata (possibly outdated)
|
||||
from unicodedata import *
|
||||
|
||||
from . import Blocks, Scripts, ScriptExtensions, OTTags
|
||||
|
||||
|
||||
__all__ = [
|
||||
# names from built-in unicodedata module
|
||||
"lookup",
|
||||
"name",
|
||||
"decimal",
|
||||
"digit",
|
||||
"numeric",
|
||||
"category",
|
||||
"bidirectional",
|
||||
"combining",
|
||||
"east_asian_width",
|
||||
"mirrored",
|
||||
"decomposition",
|
||||
"normalize",
|
||||
"unidata_version",
|
||||
"ucd_3_2_0",
|
||||
# additonal functions
|
||||
"block",
|
||||
"script",
|
||||
"script_extension",
|
||||
"script_name",
|
||||
"script_code",
|
||||
"script_horizontal_direction",
|
||||
"ot_tags_from_script",
|
||||
"ot_tag_to_script",
|
||||
]
|
||||
|
||||
|
||||
def script(char):
|
||||
"""Return the four-letter script code assigned to the Unicode character
|
||||
'char' as string.
|
||||
|
||||
>>> script("a")
|
||||
'Latn'
|
||||
>>> script(",")
|
||||
'Zyyy'
|
||||
>>> script(chr(0x10FFFF))
|
||||
'Zzzz'
|
||||
"""
|
||||
code = byteord(char)
|
||||
# 'bisect_right(a, x, lo=0, hi=len(a))' returns an insertion point which
|
||||
# comes after (to the right of) any existing entries of x in a, and it
|
||||
# partitions array a into two halves so that, for the left side
|
||||
# all(val <= x for val in a[lo:i]), and for the right side
|
||||
# all(val > x for val in a[i:hi]).
|
||||
# Our 'SCRIPT_RANGES' is a sorted list of ranges (only their starting
|
||||
# breakpoints); we want to use `bisect_right` to look up the range that
|
||||
# contains the given codepoint: i.e. whose start is less than or equal
|
||||
# to the codepoint. Thus, we subtract -1 from the index returned.
|
||||
i = bisect_right(Scripts.RANGES, code)
|
||||
return Scripts.VALUES[i - 1]
|
||||
|
||||
|
||||
def script_extension(char):
|
||||
"""Return the script extension property assigned to the Unicode character
|
||||
'char' as a set of string.
|
||||
|
||||
>>> script_extension("a") == {'Latn'}
|
||||
True
|
||||
>>> script_extension(chr(0x060C)) == {'Nkoo', 'Arab', 'Rohg', 'Thaa', 'Syrc', 'Gara', 'Yezi'}
|
||||
True
|
||||
>>> script_extension(chr(0x10FFFF)) == {'Zzzz'}
|
||||
True
|
||||
"""
|
||||
code = byteord(char)
|
||||
i = bisect_right(ScriptExtensions.RANGES, code)
|
||||
value = ScriptExtensions.VALUES[i - 1]
|
||||
if value is None:
|
||||
# code points not explicitly listed for Script Extensions
|
||||
# have as their value the corresponding Script property value
|
||||
return {script(char)}
|
||||
return value
|
||||
|
||||
|
||||
def script_name(code, default=KeyError):
|
||||
"""Return the long, human-readable script name given a four-letter
|
||||
Unicode script code.
|
||||
|
||||
If no matching name is found, a KeyError is raised by default.
|
||||
|
||||
You can use the 'default' argument to return a fallback value (e.g.
|
||||
'Unknown' or None) instead of throwing an error.
|
||||
"""
|
||||
try:
|
||||
return str(Scripts.NAMES[code].replace("_", " "))
|
||||
except KeyError:
|
||||
if isinstance(default, type) and issubclass(default, KeyError):
|
||||
raise
|
||||
return default
|
||||
|
||||
|
||||
_normalize_re = re.compile(r"[-_ ]+")
|
||||
|
||||
|
||||
def _normalize_property_name(string):
|
||||
"""Remove case, strip space, '-' and '_' for loose matching."""
|
||||
return _normalize_re.sub("", string).lower()
|
||||
|
||||
|
||||
_SCRIPT_CODES = {_normalize_property_name(v): k for k, v in Scripts.NAMES.items()}
|
||||
|
||||
|
||||
def script_code(script_name, default=KeyError):
|
||||
"""Returns the four-letter Unicode script code from its long name
|
||||
|
||||
If no matching script code is found, a KeyError is raised by default.
|
||||
|
||||
You can use the 'default' argument to return a fallback string (e.g.
|
||||
'Zzzz' or None) instead of throwing an error.
|
||||
"""
|
||||
normalized_name = _normalize_property_name(script_name)
|
||||
try:
|
||||
return _SCRIPT_CODES[normalized_name]
|
||||
except KeyError:
|
||||
if isinstance(default, type) and issubclass(default, KeyError):
|
||||
raise
|
||||
return default
|
||||
|
||||
|
||||
# The data on script direction is taken from Harfbuzz source code:
|
||||
# https://github.com/harfbuzz/harfbuzz/blob/3.2.0/src/hb-common.cc#L514-L613
|
||||
# This in turn references the following "Script_Metadata" document:
|
||||
# https://docs.google.com/spreadsheets/d/1Y90M0Ie3MUJ6UVCRDOypOtijlMDLNNyyLk36T6iMu0o
|
||||
RTL_SCRIPTS = {
|
||||
# Unicode-1.1 additions
|
||||
"Arab", # Arabic
|
||||
"Hebr", # Hebrew
|
||||
# Unicode-3.0 additions
|
||||
"Syrc", # Syriac
|
||||
"Thaa", # Thaana
|
||||
# Unicode-4.0 additions
|
||||
"Cprt", # Cypriot
|
||||
# Unicode-4.1 additions
|
||||
"Khar", # Kharoshthi
|
||||
# Unicode-5.0 additions
|
||||
"Phnx", # Phoenician
|
||||
"Nkoo", # Nko
|
||||
# Unicode-5.1 additions
|
||||
"Lydi", # Lydian
|
||||
# Unicode-5.2 additions
|
||||
"Avst", # Avestan
|
||||
"Armi", # Imperial Aramaic
|
||||
"Phli", # Inscriptional Pahlavi
|
||||
"Prti", # Inscriptional Parthian
|
||||
"Sarb", # Old South Arabian
|
||||
"Orkh", # Old Turkic
|
||||
"Samr", # Samaritan
|
||||
# Unicode-6.0 additions
|
||||
"Mand", # Mandaic
|
||||
# Unicode-6.1 additions
|
||||
"Merc", # Meroitic Cursive
|
||||
"Mero", # Meroitic Hieroglyphs
|
||||
# Unicode-7.0 additions
|
||||
"Mani", # Manichaean
|
||||
"Mend", # Mende Kikakui
|
||||
"Nbat", # Nabataean
|
||||
"Narb", # Old North Arabian
|
||||
"Palm", # Palmyrene
|
||||
"Phlp", # Psalter Pahlavi
|
||||
# Unicode-8.0 additions
|
||||
"Hatr", # Hatran
|
||||
"Hung", # Old Hungarian
|
||||
# Unicode-9.0 additions
|
||||
"Adlm", # Adlam
|
||||
# Unicode-11.0 additions
|
||||
"Rohg", # Hanifi Rohingya
|
||||
"Sogo", # Old Sogdian
|
||||
"Sogd", # Sogdian
|
||||
# Unicode-12.0 additions
|
||||
"Elym", # Elymaic
|
||||
# Unicode-13.0 additions
|
||||
"Chrs", # Chorasmian
|
||||
"Yezi", # Yezidi
|
||||
# Unicode-14.0 additions
|
||||
"Ougr", # Old Uyghur
|
||||
}
|
||||
|
||||
|
||||
HorizDirection = Literal["RTL", "LTR"]
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@overload
|
||||
def script_horizontal_direction(script_code: str, default: T) -> HorizDirection | T: ...
|
||||
|
||||
|
||||
@overload
|
||||
def script_horizontal_direction(
|
||||
script_code: str, default: type[KeyError] = KeyError
|
||||
) -> HorizDirection: ...
|
||||
|
||||
|
||||
def script_horizontal_direction(
|
||||
script_code: str, default: T | type[KeyError] = KeyError
|
||||
) -> HorizDirection | T:
|
||||
"""Return "RTL" for scripts that contain right-to-left characters
|
||||
according to the Bidi_Class property. Otherwise return "LTR".
|
||||
"""
|
||||
if script_code not in Scripts.NAMES:
|
||||
if isinstance(default, type) and issubclass(default, KeyError):
|
||||
raise default(script_code)
|
||||
return default
|
||||
return "RTL" if script_code in RTL_SCRIPTS else "LTR"
|
||||
|
||||
|
||||
def block(char):
|
||||
"""Return the block property assigned to the Unicode character 'char'
|
||||
as a string.
|
||||
|
||||
>>> block("a")
|
||||
'Basic Latin'
|
||||
>>> block(chr(0x060C))
|
||||
'Arabic'
|
||||
>>> block(chr(0xEFFFF))
|
||||
'No_Block'
|
||||
"""
|
||||
code = byteord(char)
|
||||
i = bisect_right(Blocks.RANGES, code)
|
||||
return Blocks.VALUES[i - 1]
|
||||
|
||||
|
||||
def ot_tags_from_script(script_code):
|
||||
"""Return a list of OpenType script tags associated with a given
|
||||
Unicode script code.
|
||||
Return ['DFLT'] script tag for invalid/unknown script codes.
|
||||
"""
|
||||
if script_code in OTTags.SCRIPT_EXCEPTIONS:
|
||||
return [OTTags.SCRIPT_EXCEPTIONS[script_code]]
|
||||
|
||||
if script_code not in Scripts.NAMES:
|
||||
return [OTTags.DEFAULT_SCRIPT]
|
||||
|
||||
script_tags = [script_code[0].lower() + script_code[1:]]
|
||||
if script_code in OTTags.NEW_SCRIPT_TAGS:
|
||||
script_tags.extend(OTTags.NEW_SCRIPT_TAGS[script_code])
|
||||
script_tags.reverse() # last in, first out
|
||||
|
||||
return script_tags
|
||||
|
||||
|
||||
def ot_tag_to_script(tag):
|
||||
"""Return the Unicode script code for the given OpenType script tag, or
|
||||
None for "DFLT" tag or if there is no Unicode script associated with it.
|
||||
Raises ValueError if the tag is invalid.
|
||||
"""
|
||||
tag = tostr(tag).strip()
|
||||
if not tag or " " in tag or len(tag) > 4:
|
||||
raise ValueError("invalid OpenType tag: %r" % tag)
|
||||
|
||||
if tag in OTTags.SCRIPT_ALIASES:
|
||||
tag = OTTags.SCRIPT_ALIASES[tag]
|
||||
|
||||
while len(tag) != 4:
|
||||
tag += str(" ") # pad with spaces
|
||||
|
||||
if tag == OTTags.DEFAULT_SCRIPT:
|
||||
# it's unclear which Unicode script the "DFLT" OpenType tag maps to,
|
||||
# so here we return None
|
||||
return None
|
||||
|
||||
if tag in OTTags.NEW_SCRIPT_TAGS_REVERSED:
|
||||
return OTTags.NEW_SCRIPT_TAGS_REVERSED[tag]
|
||||
|
||||
if tag in OTTags.SCRIPT_EXCEPTIONS_REVERSED:
|
||||
return OTTags.SCRIPT_EXCEPTIONS_REVERSED[tag]
|
||||
|
||||
# This side of the conversion is fully algorithmic
|
||||
|
||||
# Any spaces at the end of the tag are replaced by repeating the last
|
||||
# letter. Eg 'nko ' -> 'Nkoo'.
|
||||
# Change first char to uppercase
|
||||
script_code = tag[0].upper() + tag[1]
|
||||
for i in range(2, 4):
|
||||
script_code += script_code[i - 1] if tag[i] == " " else tag[i]
|
||||
|
||||
if script_code not in Scripts.NAMES:
|
||||
return None
|
||||
return script_code
|
||||
Reference in New Issue
Block a user