diff options
author | Mike FABIAN <mfabian@redhat.com> | 2017-09-06 11:19:33 +0200 |
---|---|---|
committer | Mike FABIAN <mfabian@redhat.com> | 2017-09-06 12:39:49 +0200 |
commit | 2ae5be041d9ea89cdd0f37734d72051e8f773947 (patch) | |
tree | c3d52a8b19d8be09ad89b6bbe6b8f0b462b98a48 /localedata/unicode-gen | |
parent | af83ed5c4647bda196fc1a7efebbe8019aa83f4a (diff) | |
download | glibc-2ae5be041d9ea89cdd0f37734d72051e8f773947.tar glibc-2ae5be041d9ea89cdd0f37734d72051e8f773947.tar.gz glibc-2ae5be041d9ea89cdd0f37734d72051e8f773947.tar.bz2 glibc-2ae5be041d9ea89cdd0f37734d72051e8f773947.zip |
Improve utf8_gen.py to set the width for characters with Prepended_Concatenation_Mark property to 1
[BZ #22070]
* localedata/unicode-gen/utf8_gen.py: Set the width for
characters with Prepended_Concatenation_Mark property to 1
* localedata/charmaps/UTF-8: Updated using the improved script.
Diffstat (limited to 'localedata/unicode-gen')
-rw-r--r-- | localedata/unicode-gen/Makefile | 4 | ||||
-rw-r--r-- | localedata/unicode-gen/PropList.txt | 1618 | ||||
-rwxr-xr-x | localedata/unicode-gen/utf8_gen.py | 33 |
3 files changed, 1648 insertions, 7 deletions
diff --git a/localedata/unicode-gen/Makefile b/localedata/unicode-gen/Makefile index d62603ed3d..4564670451 100644 --- a/localedata/unicode-gen/Makefile +++ b/localedata/unicode-gen/Makefile @@ -40,7 +40,7 @@ UNICODE_VERSION = 10.0.0 PYTHON3 = python3 WGET = wget -DOWNLOADS = UnicodeData.txt DerivedCoreProperties.txt EastAsianWidth.txt +DOWNLOADS = UnicodeData.txt DerivedCoreProperties.txt EastAsianWidth.txt PropList.txt GENERATED = i18n tr_TR UTF-8 translit_combining translit_compat translit_circle translit_cjk_compat translit_font translit_fraction REPORTS = i18n-report UTF-8-report @@ -92,7 +92,7 @@ tr_TR: gen_unicode_ctype.py UTF-8: UnicodeData.txt EastAsianWidth.txt UTF-8: utf8_gen.py - $(PYTHON3) utf8_gen.py UnicodeData.txt EastAsianWidth.txt + $(PYTHON3) utf8_gen.py UnicodeData.txt EastAsianWidth.txt PropList.txt UTF-8-report: UTF-8 ../charmaps/UTF-8 UTF-8-report: utf8_compatibility.py diff --git a/localedata/unicode-gen/PropList.txt b/localedata/unicode-gen/PropList.txt new file mode 100644 index 0000000000..9a2d0e4b1c --- /dev/null +++ b/localedata/unicode-gen/PropList.txt @@ -0,0 +1,1618 @@ +# PropList-10.0.0.txt +# Date: 2017-03-10, 08:25:30 GMT +# © 2017 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +0009..000D ; White_Space # Cc [5] <control-0009>..<control-000D> +0020 ; White_Space # Zs SPACE +0085 ; White_Space # Cc <control-0085> +00A0 ; White_Space # Zs NO-BREAK SPACE +1680 ; White_Space # Zs OGHAM SPACE MARK +2000..200A ; White_Space # Zs [11] EN QUAD..HAIR SPACE +2028 ; White_Space # Zl LINE SEPARATOR +2029 ; White_Space # Zp PARAGRAPH SEPARATOR +202F ; White_Space # Zs NARROW NO-BREAK SPACE +205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE +3000 ; White_Space # Zs IDEOGRAPHIC SPACE + +# Total code points: 25 + +# ================================================ + +061C ; Bidi_Control # Cf ARABIC LETTER MARK +200E..200F ; Bidi_Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +202A..202E ; Bidi_Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2066..2069 ; Bidi_Control # Cf [4] LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE + +# Total code points: 12 + +# ================================================ + +200C..200D ; Join_Control # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER + +# Total code points: 2 + +# ================================================ + +002D ; Dash # Pd HYPHEN-MINUS +058A ; Dash # Pd ARMENIAN HYPHEN +05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF +1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN +1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR +2053 ; Dash # Po SWUNG DASH +207B ; Dash # Sm SUPERSCRIPT MINUS +208B ; Dash # Sm SUBSCRIPT MINUS +2212 ; Dash # Sm MINUS SIGN +2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN +2E1A ; Dash # Pd HYPHEN WITH DIAERESIS +2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH +2E40 ; Dash # Pd DOUBLE HYPHEN +301C ; Dash # Pd WAVE DASH +3030 ; Dash # Pd WAVY DASH +30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE58 ; Dash # Pd SMALL EM DASH +FE63 ; Dash # Pd SMALL HYPHEN-MINUS +FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS + +# Total code points: 28 + +# ================================================ + +002D ; Hyphen # Pd HYPHEN-MINUS +00AD ; Hyphen # Cf SOFT HYPHEN +058A ; Hyphen # Pd ARMENIAN HYPHEN +1806 ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2011 ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN +2E17 ; Hyphen # Pd DOUBLE OBLIQUE HYPHEN +30FB ; Hyphen # Po KATAKANA MIDDLE DOT +FE63 ; Hyphen # Pd SMALL HYPHEN-MINUS +FF0D ; Hyphen # Pd FULLWIDTH HYPHEN-MINUS +FF65 ; Hyphen # Po HALFWIDTH KATAKANA MIDDLE DOT + +# Total code points: 11 + +# ================================================ + +0022 ; Quotation_Mark # Po QUOTATION MARK +0027 ; Quotation_Mark # Po APOSTROPHE +00AB ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2018 ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK +2019 ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK +201A ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Quotation_Mark # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2039 ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2E42 ; Quotation_Mark # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +300C ; Quotation_Mark # Ps LEFT CORNER BRACKET +300D ; Quotation_Mark # Pe RIGHT CORNER BRACKET +300E ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET +300F ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET +301D ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Quotation_Mark # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +FE41 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FF02 ; Quotation_Mark # Po FULLWIDTH QUOTATION MARK +FF07 ; Quotation_Mark # Po FULLWIDTH APOSTROPHE +FF62 ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET + +# Total code points: 30 + +# ================================================ + +0021 ; Terminal_Punctuation # Po EXCLAMATION MARK +002C ; Terminal_Punctuation # Po COMMA +002E ; Terminal_Punctuation # Po FULL STOP +003A..003B ; Terminal_Punctuation # Po [2] COLON..SEMICOLON +003F ; Terminal_Punctuation # Po QUESTION MARK +037E ; Terminal_Punctuation # Po GREEK QUESTION MARK +0387 ; Terminal_Punctuation # Po GREEK ANO TELEIA +0589 ; Terminal_Punctuation # Po ARMENIAN FULL STOP +05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ +060C ; Terminal_Punctuation # Po ARABIC COMMA +061B ; Terminal_Punctuation # Po ARABIC SEMICOLON +061F ; Terminal_Punctuation # Po ARABIC QUESTION MARK +06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP +0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION +070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS +07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK +0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +085E ; Terminal_Punctuation # Po MANDAIC PUNCTUATION +0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD +0F0D..0F12 ; Terminal_Punctuation # Po [6] TIBETAN MARK SHAD..TIBETAN MARK RGYA GRAM SHAD +104A..104B ; Terminal_Punctuation # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1361..1368 ; Terminal_Punctuation # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +166D..166E ; Terminal_Punctuation # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP +16EB..16ED ; Terminal_Punctuation # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +1735..1736 ; Terminal_Punctuation # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +17D4..17D6 ; Terminal_Punctuation # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17DA ; Terminal_Punctuation # Po KHMER SIGN KOOMUUT +1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS +1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP +1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN +1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK +2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP +2E41 ; Terminal_Punctuation # Po REVERSED COMMA +3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP +A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK +A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK +A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA +A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI +AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI +FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK +FF0C ; Terminal_Punctuation # Po FULLWIDTH COMMA +FF0E ; Terminal_Punctuation # Po FULLWIDTH FULL STOP +FF1A..FF1B ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1F ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK +FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA +1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER +103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER +10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN +1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR +10A56..10A57 ; Terminal_Punctuation # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +10AF0..10AF5 ; Terminal_Punctuation # Po [6] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION TWO DOTS +10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Terminal_Punctuation # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; Terminal_Punctuation # Po SHARADA SUTRA MARK +111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK +112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK +1144B..1144D ; Terminal_Punctuation # Po [3] NEWA DANDA..NEWA COMMA +1145B ; Terminal_Punctuation # Po NEWA PLACEHOLDER MARK +115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR +115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11A42..11A43 ; Terminal_Punctuation # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; Terminal_Punctuation # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11AA1..11AA2 ; Terminal_Punctuation # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2 +11C41..11C43 ; Terminal_Punctuation # Po [3] BHAIKSUKI DANDA..BHAIKSUKI WORD SEPARATOR +11C71 ; Terminal_Punctuation # Po MARCHEN MARK SHAD +12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP +16B37..16B39 ; Terminal_Punctuation # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM +16B44 ; Terminal_Punctuation # Po PAHAWH HMONG SIGN XAUS +1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON + +# Total code points: 252 + +# ================================================ + +005E ; Other_Math # Sk CIRCUMFLEX ACCENT +03D0..03D2 ; Other_Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL +03D5 ; Other_Math # L& GREEK PHI SYMBOL +03F0..03F1 ; Other_Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Other_Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +2016 ; Other_Math # Po DOUBLE VERTICAL LINE +2032..2034 ; Other_Math # Po [3] PRIME..TRIPLE PRIME +2040 ; Other_Math # Pc CHARACTER TIE +2061..2064 ; Other_Math # Cf [4] FUNCTION APPLICATION..INVISIBLE PLUS +207D ; Other_Math # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Other_Math # Pe SUPERSCRIPT RIGHT PARENTHESIS +208D ; Other_Math # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Other_Math # Pe SUBSCRIPT RIGHT PARENTHESIS +20D0..20DC ; Other_Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; Other_Math # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20E6 ; Other_Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY +20EB..20EF ; Other_Math # Mn [5] COMBINING LONG DOUBLE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW +2102 ; Other_Math # L& DOUBLE-STRUCK CAPITAL C +2107 ; Other_Math # L& EULER CONSTANT +210A..2113 ; Other_Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Other_Math # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Other_Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Other_Math # L& DOUBLE-STRUCK CAPITAL Z +2128 ; Other_Math # L& BLACK-LETTER CAPITAL Z +2129 ; Other_Math # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Other_Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212F..2131 ; Other_Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Other_Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Other_Math # Lo [4] ALEF SYMBOL..DALET SYMBOL +213C..213F ; Other_Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Other_Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +2195..2199 ; Other_Math # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219C..219F ; Other_Math # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A1..21A2 ; Other_Math # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A4..21A5 ; Other_Math # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A7 ; Other_Math # So DOWNWARDS ARROW FROM BAR +21A9..21AD ; Other_Math # So [5] LEFTWARDS ARROW WITH HOOK..LEFT RIGHT WAVE ARROW +21B0..21B1 ; Other_Math # So [2] UPWARDS ARROW WITH TIP LEFTWARDS..UPWARDS ARROW WITH TIP RIGHTWARDS +21B6..21B7 ; Other_Math # So [2] ANTICLOCKWISE TOP SEMICIRCLE ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21BC..21CD ; Other_Math # So [18] LEFTWARDS HARPOON WITH BARB UPWARDS..LEFTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Other_Math # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D3 ; Other_Math # So DOWNWARDS DOUBLE ARROW +21D5..21DB ; Other_Math # So [7] UP DOWN DOUBLE ARROW..RIGHTWARDS TRIPLE ARROW +21DD ; Other_Math # So RIGHTWARDS SQUIGGLE ARROW +21E4..21E5 ; Other_Math # So [2] LEFTWARDS ARROW TO BAR..RIGHTWARDS ARROW TO BAR +2308 ; Other_Math # Ps LEFT CEILING +2309 ; Other_Math # Pe RIGHT CEILING +230A ; Other_Math # Ps LEFT FLOOR +230B ; Other_Math # Pe RIGHT FLOOR +23B4..23B5 ; Other_Math # So [2] TOP SQUARE BRACKET..BOTTOM SQUARE BRACKET +23B7 ; Other_Math # So RADICAL SYMBOL BOTTOM +23D0 ; Other_Math # So VERTICAL LINE EXTENSION +23E2 ; Other_Math # So WHITE TRAPEZIUM +25A0..25A1 ; Other_Math # So [2] BLACK SQUARE..WHITE SQUARE +25AE..25B6 ; Other_Math # So [9] BLACK VERTICAL RECTANGLE..BLACK RIGHT-POINTING TRIANGLE +25BC..25C0 ; Other_Math # So [5] BLACK DOWN-POINTING TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C6..25C7 ; Other_Math # So [2] BLACK DIAMOND..WHITE DIAMOND +25CA..25CB ; Other_Math # So [2] LOZENGE..WHITE CIRCLE +25CF..25D3 ; Other_Math # So [5] BLACK CIRCLE..CIRCLE WITH UPPER HALF BLACK +25E2 ; Other_Math # So BLACK LOWER RIGHT TRIANGLE +25E4 ; Other_Math # So BLACK UPPER LEFT TRIANGLE +25E7..25EC ; Other_Math # So [6] SQUARE WITH LEFT HALF BLACK..WHITE UP-POINTING TRIANGLE WITH DOT +2605..2606 ; Other_Math # So [2] BLACK STAR..WHITE STAR +2640 ; Other_Math # So FEMALE SIGN +2642 ; Other_Math # So MALE SIGN +2660..2663 ; Other_Math # So [4] BLACK SPADE SUIT..BLACK CLUB SUIT +266D..266E ; Other_Math # So [2] MUSIC FLAT SIGN..MUSIC NATURAL SIGN +27C5 ; Other_Math # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Other_Math # Pe RIGHT S-SHAPED BAG DELIMITER +27E6 ; Other_Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Other_Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Other_Math # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Other_Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Other_Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Other_Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Other_Math # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Other_Math # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Other_Math # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Other_Math # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +2983 ; Other_Math # Ps LEFT WHITE CURLY BRACKET +2984 ; Other_Math # Pe RIGHT WHITE CURLY BRACKET +2985 ; Other_Math # Ps LEFT WHITE PARENTHESIS +2986 ; Other_Math # Pe RIGHT WHITE PARENTHESIS +2987 ; Other_Math # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Other_Math # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Other_Math # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Other_Math # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Other_Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Other_Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Other_Math # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Other_Math # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Other_Math # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Other_Math # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Other_Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Other_Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Other_Math # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Other_Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET +29D8 ; Other_Math # Ps LEFT WIGGLY FENCE +29D9 ; Other_Math # Pe RIGHT WIGGLY FENCE +29DA ; Other_Math # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Other_Math # Pe RIGHT DOUBLE WIGGLY FENCE +29FC ; Other_Math # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Other_Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET +FE61 ; Other_Math # Po SMALL ASTERISK +FE63 ; Other_Math # Pd SMALL HYPHEN-MINUS +FE68 ; Other_Math # Po SMALL REVERSE SOLIDUS +FF3C ; Other_Math # Po FULLWIDTH REVERSE SOLIDUS +FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT +1D400..1D454 ; Other_Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Other_Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Other_Math # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Other_Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Other_Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Other_Math # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Other_Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Other_Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Other_Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Other_Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Other_Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Other_Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Other_Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Other_Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Other_Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Other_Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Other_Math # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Other_Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Other_Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Other_Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Other_Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Other_Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Other_Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Other_Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Other_Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; Other_Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Other_Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Other_Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Other_Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Other_Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Other_Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Other_Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Other_Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Other_Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Other_Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Other_Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Other_Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Other_Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Other_Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Other_Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 1362 + +# ================================================ + +0030..0039 ; Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F +FF10..FF19 ; Hex_Digit # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF26 ; Hex_Digit # L& [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F +FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F + +# Total code points: 44 + +# ================================================ + +0030..0039 ; ASCII_Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; ASCII_Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; ASCII_Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F + +# Total code points: 22 + +# ================================================ + +0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG +05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE +05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Other_Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Other_Alphabetic # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Other_Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..0657 ; Other_Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA +0659..065F ; Other_Alphabetic # Mn [7] ARABIC ZWARAKAY..ARABIC WAVY HAMZA BELOW +0670 ; Other_Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Other_Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06E1..06E4 ; Other_Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Other_Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06ED ; Other_Alphabetic # Mn ARABIC SMALL LOW MEEM +0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA +07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN +0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF +081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA +08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN +08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA +0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA +093A ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN OE +093B ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN OOE +093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Other_Alphabetic # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0955..0957 ; Other_Alphabetic # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BE..09C0 ; Other_Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Other_Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09D7 ; Other_Alphabetic # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Other_Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +0A01..0A02 ; Other_Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Other_Alphabetic # Mc GURMUKHI SIGN VISARGA +0A3E..0A40 ; Other_Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4C ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU +0A51 ; Other_Alphabetic # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Other_Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Other_Alphabetic # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Other_Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Other_Alphabetic # Mc GUJARATI SIGN VISARGA +0ABE..0AC0 ; Other_Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Other_Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFC ; Other_Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH +0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA +0B3F ; Other_Alphabetic # Mn ORIYA VOWEL SIGN I +0B40 ; Other_Alphabetic # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Other_Alphabetic # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B56 ; Other_Alphabetic # Mn ORIYA AI LENGTH MARK +0B57 ; Other_Alphabetic # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Other_Alphabetic # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Other_Alphabetic # Mn TAMIL SIGN ANUSVARA +0BBE..0BBF ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Other_Alphabetic # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD7 ; Other_Alphabetic # Mc TAMIL AU LENGTH MARK +0C00 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; Other_Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C3E..0C40 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Other_Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4C ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU +0C55..0C56 ; Other_Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Other_Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Other_Alphabetic # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; Other_Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; Other_Alphabetic # Mc KANNADA VOWEL SIGN AA +0CBF ; Other_Alphabetic # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Other_Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Other_Alphabetic # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU +0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; Other_Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D57 ; Other_Alphabetic # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Other_Alphabetic # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D82..0D83 ; Other_Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DCF..0DD1 ; Other_Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Other_Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Other_Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Other_Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Other_Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E31 ; Other_Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Other_Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E4D ; Other_Alphabetic # Mn THAI CHARACTER NIKHAHIT +0EB1 ; Other_Alphabetic # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EB9 ; Other_Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Other_Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0ECD ; Other_Alphabetic # Mn LAO NIGGAHITA +0F71..0F7E ; Other_Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Other_Alphabetic # Mc TIBETAN SIGN RNAM BCAD +0F80..0F81 ; Other_Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II +0F8D..0F97 ; Other_Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Other_Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +102B..102C ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN E +1032..1036 ; Other_Alphabetic # Mn [5] MYANMAR VOWEL SIGN AI..MYANMAR SIGN ANUSVARA +1038 ; Other_Alphabetic # Mc MYANMAR SIGN VISARGA +103B..103C ; Other_Alphabetic # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Other_Alphabetic # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1056..1057 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Other_Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1062 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN SGAW KAREN EU +1067..1068 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR VOWEL SIGN WESTERN PWO KAREN UE +1071..1074 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +109C ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN AITON A +109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI +135F ; Other_Alphabetic # Mn ETHIOPIC COMBINING GEMINATION MARK +1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U +1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Other_Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Other_Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B6 ; Other_Alphabetic # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Other_Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +1885..1886 ; Other_Alphabetic # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Other_Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Other_Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Other_Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Other_Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Other_Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; Other_Alphabetic # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; Other_Alphabetic # Mn BUGINESE VOWEL SIGN AE +1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A +1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG +1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH +1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Other_Alphabetic # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Other_Alphabetic # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Other_Alphabetic # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Other_Alphabetic # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Other_Alphabetic # Mn BALINESE VOWEL SIGN PEPET +1B43 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN PEPET TEDUNG +1B80..1B81 ; Other_Alphabetic # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Other_Alphabetic # Mc SUNDANESE SIGN PANGWISAD +1BA1 ; Other_Alphabetic # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Other_Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Other_Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Other_Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Other_Alphabetic # Mn [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE7 ; Other_Alphabetic # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; Other_Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; Other_Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; Other_Alphabetic # Mn BATAK VOWEL SIGN KARO O +1BEE ; Other_Alphabetic # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; Other_Alphabetic # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1CF2..1CF3 ; Other_Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A69E..A69F ; Other_Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO +A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C5 ; Other_Alphabetic # Mn SAURASHTRA SIGN CANDRABINDU +A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O +A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H +A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN +A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC ; Other_Alphabetic # Mn JAVANESE VOWEL SIGN PEPET +A9BD..A9BF ; Other_Alphabetic # Mc [3] JAVANESE CONSONANT SIGN KERET..JAVANESE CONSONANT SIGN CAKRA +AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Other_Alphabetic # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H +AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM +AAEB ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Other_Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA +ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +10376..1037A ; Other_Alphabetic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU +11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA +11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA +11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU +11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA +110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Other_Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11180..11181 ; Other_Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Other_Alphabetic # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Other_Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Other_Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Other_Alphabetic # Mc SHARADA VOWEL SIGN AU +1122C..1122E ; Other_Alphabetic # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; Other_Alphabetic # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; Other_Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA +11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA +1123E ; Other_Alphabetic # Mn KHOJKI SIGN SUKUN +112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU +11300..11301 ; Other_Alphabetic # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; Other_Alphabetic # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133E..1133F ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; Other_Alphabetic # Mn GRANTHA VOWEL SIGN II +11341..11344 ; Other_Alphabetic # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK +11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11435..11437 ; Other_Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Other_Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Other_Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11443..11444 ; Other_Alphabetic # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA +11445 ; Other_Alphabetic # Mc NEWA SIGN VISARGA +114B0..114B2 ; Other_Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; Other_Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; Other_Alphabetic # Mc TIRHUTA VOWEL SIGN E +114BA ; Other_Alphabetic # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; Other_Alphabetic # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; Other_Alphabetic # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; Other_Alphabetic # Mc TIRHUTA SIGN VISARGA +115AF..115B1 ; Other_Alphabetic # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; Other_Alphabetic # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; Other_Alphabetic # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; Other_Alphabetic # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; Other_Alphabetic # Mc SIDDHAM SIGN VISARGA +115DC..115DD ; Other_Alphabetic # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11630..11632 ; Other_Alphabetic # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; Other_Alphabetic # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; Other_Alphabetic # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; Other_Alphabetic # Mn MODI SIGN ANUSVARA +1163E ; Other_Alphabetic # Mc MODI SIGN VISARGA +11640 ; Other_Alphabetic # Mn MODI SIGN ARDHACANDRA +116AB ; Other_Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Other_Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +1171D..1171F ; Other_Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E +11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM +11A01..11A06 ; Other_Alphabetic # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A07..11A08 ; Other_Alphabetic # Mc [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A09..11A0A ; Other_Alphabetic # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A35..11A38 ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Other_Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3B..11A3E ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A51..11A56 ; Other_Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Other_Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA +11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Other_Alphabetic # Mc BHAIKSUKI SIGN VISARGA +11C92..11CA7 ; Other_Alphabetic # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Other_Alphabetic # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Other_Alphabetic # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Other_Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Other_Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Other_Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Other_Alphabetic # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Other_Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D41 ; Other_Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA +11D43 ; Other_Alphabetic # Mn MASARAM GONDI SIGN CANDRA +11D47 ; Other_Alphabetic # Mn MASARAM GONDI RA-KARA +16B30..16B36 ; Other_Alphabetic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK +1E000..1E006 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Other_Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Other_Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Other_Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E947 ; Other_Alphabetic # Mn ADLAM HAMZA +1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 1300 + +# ================================================ + +3006 ; Ideographic # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Ideographic # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FEA ; Ideographic # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA +F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +17000..187EC ; Ideographic # Lo [6125] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187EC +18800..18AF2 ; Ideographic # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755 +1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D + +# Total code points: 96174 + +# ================================================ + +005E ; Diacritic # Sk CIRCUMFLEX ACCENT +0060 ; Diacritic # Sk GRAVE ACCENT +00A8 ; Diacritic # Sk DIAERESIS +00AF ; Diacritic # Sk MACRON +00B4 ; Diacritic # Sk ACUTE ACCENT +00B7 ; Diacritic # Po MIDDLE DOT +00B8 ; Diacritic # Sk CEDILLA +02B0..02C1 ; Diacritic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Diacritic # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Diacritic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Diacritic # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Diacritic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; Diacritic # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Diacritic # Lm MODIFIER LETTER VOICING +02ED ; Diacritic # Sk MODIFIER LETTER UNASPIRATED +02EE ; Diacritic # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Diacritic # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..034E ; Diacritic # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW +0350..0357 ; Diacritic # Mn [8] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING RIGHT HALF RING ABOVE +035D..0362 ; Diacritic # Mn [6] COMBINING DOUBLE BREVE..COMBINING DOUBLE RIGHTWARDS ARROW BELOW +0374 ; Diacritic # Lm GREEK NUMERAL SIGN +0375 ; Diacritic # Sk GREEK LOWER NUMERAL SIGN +037A ; Diacritic # Lm GREEK YPOGEGRAMMENI +0384..0385 ; Diacritic # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0483..0487 ; Diacritic # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0559 ; Diacritic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0591..05A1 ; Diacritic # Mn [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER +05A3..05BD ; Diacritic # Mn [27] HEBREW ACCENT MUNAH..HEBREW POINT METEG +05BF ; Diacritic # Mn HEBREW POINT RAFE +05C1..05C2 ; Diacritic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4 ; Diacritic # Mn HEBREW MARK UPPER DOT +064B..0652 ; Diacritic # Mn [8] ARABIC FATHATAN..ARABIC SUKUN +0657..0658 ; Diacritic # Mn [2] ARABIC INVERTED DAMMA..ARABIC MARK NOON GHUNNA +06DF..06E0 ; Diacritic # Mn [2] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO +06E5..06E6 ; Diacritic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EA..06EC ; Diacritic # Mn [3] ARABIC EMPTY CENTRE LOW STOP..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE +0730..074A ; Diacritic # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH +08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT +093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA +094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA +0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT +0971 ; Diacritic # Lm DEVANAGARI SIGN HIGH SPACING DOT +09BC ; Diacritic # Mn BENGALI SIGN NUKTA +09CD ; Diacritic # Mn BENGALI SIGN VIRAMA +0A3C ; Diacritic # Mn GURMUKHI SIGN NUKTA +0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA +0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA +0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA +0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B3C ; Diacritic # Mn ORIYA SIGN NUKTA +0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA +0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA +0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA +0CBC ; Diacritic # Mn KANNADA SIGN NUKTA +0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA +0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA +0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA +0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT +0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN +0EC8..0ECC ; Diacritic # Mn [5] LAO TONE MAI EK..LAO CANCELLATION MARK +0F18..0F19 ; Diacritic # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Diacritic # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; Diacritic # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F82..0F84 ; Diacritic # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA +0F86..0F87 ; Diacritic # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0FC6 ; Diacritic # Mn TIBETAN SYMBOL PADMA GDAN +1037 ; Diacritic # Mn MYANMAR SIGN DOT BELOW +1039..103A ; Diacritic # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3 +17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Diacritic # Mn KHMER SIGN ATTHACAN +1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1B34 ; Diacritic # Mn BALINESE SIGN REREKAN +1B44 ; Diacritic # Mc BALINESE ADEG ADEG +1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA +1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA +1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Diacritic # Mn VEDIC SIGN TIRYAK +1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE +1CF7 ; Diacritic # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW +1DF5..1DF9 ; Diacritic # Mn [5] COMBINING UP TACK ABOVE..COMBINING WIDE INVERTED BRIDGE BELOW +1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1FBD ; Diacritic # Sk GREEK KORONIS +1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA +2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2E2F ; Diacritic # Lm VERTICAL TILDE +302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Diacritic # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET +A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A67F ; Diacritic # Lm CYRILLIC PAYEROK +A69C..A69D ; Diacritic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA +A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU +A92E ; Diacritic # Po KAYAH LI SIGN CWI +A953 ; Diacritic # Mc REJANG VIRAMA +A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU +A9C0 ; Diacritic # Mc JAVANESE PANGKON +A9E5 ; Diacritic # Mn MYANMAR SIGN SHAN SAW +AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Diacritic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Diacritic # Mc MYANMAR SIGN TAI LAING TONE-5 +AABF ; Diacritic # Mn TAI VIET TONE MAI EK +AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG +AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO +AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG +AAF6 ; Diacritic # Mn MEETEI MAYEK VIRAMA +AB5B ; Diacritic # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; Diacritic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK +ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK +FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE20..FE2F ; Diacritic # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT +FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE3 ; Diacritic # Sk FULLWIDTH MACRON +102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK +10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA +111C0 ; Diacritic # Mc SHARADA SIGN VIRAMA +111CA..111CC ; Diacritic # Mn [3] SHARADA SIGN NUKTA..SHARADA EXTRA SHORT VOWEL MARK +11235 ; Diacritic # Mc KHOJKI SIGN VIRAMA +11236 ; Diacritic # Mn KHOJKI SIGN NUKTA +112E9..112EA ; Diacritic # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA +1133C ; Diacritic # Mn GRANTHA SIGN NUKTA +1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA +11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11442 ; Diacritic # Mn NEWA SIGN VIRAMA +11446 ; Diacritic # Mn NEWA SIGN NUKTA +114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115BF..115C0 ; Diacritic # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +1163F ; Diacritic # Mn MODI SIGN VIRAMA +116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA +116B7 ; Diacritic # Mn TAKRI SIGN NUKTA +1172B ; Diacritic # Mn AHOM SIGN KILLER +11A34 ; Diacritic # Mn ZANABAZAR SQUARE SIGN VIRAMA +11A47 ; Diacritic # Mn ZANABAZAR SQUARE SUBJOINER +11A99 ; Diacritic # Mn SOYOMBO SUBJOINER +11C3F ; Diacritic # Mn BHAIKSUKI SIGN VIRAMA +11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA +11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA +16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK +1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA + +# Total code points: 798 + +# ================================================ + +00B7 ; Extender # Po MIDDLE DOT +02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON +0640 ; Extender # Lm ARABIC TATWEEL +07FA ; Extender # Lm NKO LAJANYALAN +0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK +0EC6 ; Extender # Lm LAO KO LA +180A ; Extender # Po MONGOLIAN NIRUGU +1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK +1C36 ; Extender # Mn LEPCHA SIGN RAN +1C7B ; Extender # Lm OL CHIKI RELAA +3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK +3031..3035 ; Extender # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +309D..309E ; Extender # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +A015 ; Extender # Lm YI SYLLABLE WU +A60C ; Extender # Lm VAI SYLLABLE LENGTHENER +A9CF ; Extender # Lm JAVANESE PANGRANGKEP +A9E6 ; Extender # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AADD ; Extender # Lm TAI VIET SYMBOL SAM +AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +1135D ; Extender # Lo GRANTHA SIGN PLUTA +115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 +11A98 ; Extender # Mn SOYOMBO GEMINATION MARK +16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM +16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK + +# Total code points: 44 + +# ================================================ + +00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR +00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR +02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI +037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI +1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN +1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A770 ; Other_Lowercase # Lm MODIFIER LETTER US +A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK + +# Total code points: 189 + +# ================================================ + +2160..216F ; Other_Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +24B6..24CF ; Other_Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +1F130..1F149 ; Other_Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 120 + +# ================================================ + +FDD0..FDEF ; Noncharacter_Code_Point # Cn [32] <noncharacter-FDD0>..<noncharacter-FDEF> +FFFE..FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFE>..<noncharacter-FFFF> +1FFFE..1FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-1FFFE>..<noncharacter-1FFFF> +2FFFE..2FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-2FFFE>..<noncharacter-2FFFF> +3FFFE..3FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-3FFFE>..<noncharacter-3FFFF> +4FFFE..4FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-4FFFE>..<noncharacter-4FFFF> +5FFFE..5FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-5FFFE>..<noncharacter-5FFFF> +6FFFE..6FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-6FFFE>..<noncharacter-6FFFF> +7FFFE..7FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-7FFFE>..<noncharacter-7FFFF> +8FFFE..8FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-8FFFE>..<noncharacter-8FFFF> +9FFFE..9FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-9FFFE>..<noncharacter-9FFFF> +AFFFE..AFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-AFFFE>..<noncharacter-AFFFF> +BFFFE..BFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-BFFFE>..<noncharacter-BFFFF> +CFFFE..CFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-CFFFE>..<noncharacter-CFFFF> +DFFFE..DFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-DFFFE>..<noncharacter-DFFFF> +EFFFE..EFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-EFFFE>..<noncharacter-EFFFF> +FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> +10FFFE..10FFFF; Noncharacter_Code_Point # Cn [2] <noncharacter-10FFFE>..<noncharacter-10FFFF> + +# Total code points: 66 + +# ================================================ + +09BE ; Other_Grapheme_Extend # Mc BENGALI VOWEL SIGN AA +09D7 ; Other_Grapheme_Extend # Mc BENGALI AU LENGTH MARK +0B3E ; Other_Grapheme_Extend # Mc ORIYA VOWEL SIGN AA +0B57 ; Other_Grapheme_Extend # Mc ORIYA AU LENGTH MARK +0BBE ; Other_Grapheme_Extend # Mc TAMIL VOWEL SIGN AA +0BD7 ; Other_Grapheme_Extend # Mc TAMIL AU LENGTH MARK +0CC2 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN UU +0CD5..0CD6 ; Other_Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0D3E ; Other_Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA +0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK +0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER +302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +1133E ; Other_Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA +11357 ; Other_Grapheme_Extend # Mc GRANTHA AU LENGTH MARK +114B0 ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN AA +114BD ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN SHORT O +115AF ; Other_Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA +1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM +1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 +E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG + +# Total code points: 125 + +# ================================================ + +2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW +2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID + +# Total code points: 10 + +# ================================================ + +2FF2..2FF3 ; IDS_Trinary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW + +# Total code points: 2 + +# ================================================ + +2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE + +# Total code points: 329 + +# ================================================ + +3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FEA ; Unified_Ideograph # Lo [20971] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEA +FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F +FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 +FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 +FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F +FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21 +FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24 +FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29 +20000..2A6D6 ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 + +# Total code points: 87882 + +# ================================================ + +034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER +115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +17B4..17B5 ; Other_Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +2065 ; Other_Default_Ignorable_Code_Point # Cn <reserved-2065> +3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER +FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER +FFF0..FFF8 ; Other_Default_Ignorable_Code_Point # Cn [9] <reserved-FFF0>..<reserved-FFF8> +E0000 ; Other_Default_Ignorable_Code_Point # Cn <reserved-E0000> +E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] <reserved-E0002>..<reserved-E001F> +E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF> +E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> + +# Total code points: 3776 + +# ================================================ + +0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0673 ; Deprecated # Lo ARABIC LETTER ALEF WITH WAVY HAMZA BELOW +0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR +0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL +17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA +206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET +232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET +E0001 ; Deprecated # Cf LANGUAGE TAG + +# Total code points: 15 + +# ================================================ + +0069..006A ; Soft_Dotted # L& [2] LATIN SMALL LETTER I..LATIN SMALL LETTER J +012F ; Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK +0249 ; Soft_Dotted # L& LATIN SMALL LETTER J WITH STROKE +0268 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE +029D ; Soft_Dotted # L& LATIN SMALL LETTER J WITH CROSSED-TAIL +02B2 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J +03F3 ; Soft_Dotted # L& GREEK LETTER YOT +0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE +1D62 ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER I +1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK +1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE +1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL +1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW +1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW +2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I +2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J +2C7C ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER J +1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J +1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J +1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J +1D4BE..1D4BF ; Soft_Dotted # L& [2] MATHEMATICAL SCRIPT SMALL I..MATHEMATICAL SCRIPT SMALL J +1D4F2..1D4F3 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SCRIPT SMALL I..MATHEMATICAL BOLD SCRIPT SMALL J +1D526..1D527 ; Soft_Dotted # L& [2] MATHEMATICAL FRAKTUR SMALL I..MATHEMATICAL FRAKTUR SMALL J +1D55A..1D55B ; Soft_Dotted # L& [2] MATHEMATICAL DOUBLE-STRUCK SMALL I..MATHEMATICAL DOUBLE-STRUCK SMALL J +1D58E..1D58F ; Soft_Dotted # L& [2] MATHEMATICAL BOLD FRAKTUR SMALL I..MATHEMATICAL BOLD FRAKTUR SMALL J +1D5C2..1D5C3 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF SMALL I..MATHEMATICAL SANS-SERIF SMALL J +1D5F6..1D5F7 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD SMALL I..MATHEMATICAL SANS-SERIF BOLD SMALL J +1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J +1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J +1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J + +# Total code points: 46 + +# ================================================ + +0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI +0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +19B5..19B7 ; Logical_Order_Exception # Lo [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O +19BA ; Logical_Order_Exception # Lo NEW TAI LUE VOWEL SIGN AY +AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA +AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY + +# Total code points: 19 + +# ================================================ + +1885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P +212E ; Other_ID_Start # So ESTIMATED SYMBOL +309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + +# Total code points: 6 + +# ================================================ + +00B7 ; Other_ID_Continue # Po MIDDLE DOT +0387 ; Other_ID_Continue # Po GREEK ANO TELEIA +1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE + +# Total code points: 12 + +# ================================================ + +0021 ; Sentence_Terminal # Po EXCLAMATION MARK +002E ; Sentence_Terminal # Po FULL STOP +003F ; Sentence_Terminal # Po QUESTION MARK +0589 ; Sentence_Terminal # Po ARMENIAN FULL STOP +061F ; Sentence_Terminal # Po ARABIC QUESTION MARK +06D4 ; Sentence_Terminal # Po ARABIC FULL STOP +0700..0702 ; Sentence_Terminal # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP +07F9 ; Sentence_Terminal # Po NKO EXCLAMATION MARK +0964..0965 ; Sentence_Terminal # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +104A..104B ; Sentence_Terminal # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1362 ; Sentence_Terminal # Po ETHIOPIC FULL STOP +1367..1368 ; Sentence_Terminal # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +166E ; Sentence_Terminal # Po CANADIAN SYLLABICS FULL STOP +1735..1736 ; Sentence_Terminal # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1803 ; Sentence_Terminal # Po MONGOLIAN FULL STOP +1809 ; Sentence_Terminal # Po MONGOLIAN MANCHU FULL STOP +1944..1945 ; Sentence_Terminal # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; Sentence_Terminal # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B5A..1B5B ; Sentence_Terminal # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5E..1B5F ; Sentence_Terminal # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN +1C3B..1C3C ; Sentence_Terminal # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL +1C7E..1C7F ; Sentence_Terminal # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK +2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP +3002 ; Sentence_Terminal # Po IDEOGRAPHIC FULL STOP +A4FF ; Sentence_Terminal # Po LISU PUNCTUATION FULL STOP +A60E..A60F ; Sentence_Terminal # Po [2] VAI FULL STOP..VAI QUESTION MARK +A6F3 ; Sentence_Terminal # Po BAMUM FULL STOP +A6F7 ; Sentence_Terminal # Po BAMUM QUESTION MARK +A876..A877 ; Sentence_Terminal # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Sentence_Terminal # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; Sentence_Terminal # Po KAYAH LI SIGN SHYA +A9C8..A9C9 ; Sentence_Terminal # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI +AA5D..AA5F ; Sentence_Terminal # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; Sentence_Terminal # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Sentence_Terminal # Po MEETEI MAYEK CHEIKHEI +FE52 ; Sentence_Terminal # Po SMALL FULL STOP +FE56..FE57 ; Sentence_Terminal # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK +FF01 ; Sentence_Terminal # Po FULLWIDTH EXCLAMATION MARK +FF0E ; Sentence_Terminal # Po FULLWIDTH FULL STOP +FF1F ; Sentence_Terminal # Po FULLWIDTH QUESTION MARK +FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP +10A56..10A57 ; Sentence_Terminal # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +11047..11048 ; Sentence_Terminal # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA +110BE..110C1 ; Sentence_Terminal # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Sentence_Terminal # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Sentence_Terminal # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; Sentence_Terminal # Po SHARADA SUTRA MARK +111DE..111DF ; Sentence_Terminal # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..11239 ; Sentence_Terminal # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA +1123B..1123C ; Sentence_Terminal # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK +112A9 ; Sentence_Terminal # Po MULTANI SECTION MARK +1144B..1144C ; Sentence_Terminal # Po [2] NEWA DANDA..NEWA DOUBLE DANDA +115C2..115C3 ; Sentence_Terminal # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA +115C9..115D7 ; Sentence_Terminal # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; Sentence_Terminal # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; Sentence_Terminal # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11A42..11A43 ; Sentence_Terminal # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; Sentence_Terminal # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11C41..11C42 ; Sentence_Terminal # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA +16A6E..16A6F ; Sentence_Terminal # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Sentence_Terminal # Po BASSA VAH FULL STOP +16B37..16B38 ; Sentence_Terminal # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB +16B44 ; Sentence_Terminal # Po PAHAWH HMONG SIGN XAUS +1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP + +# Total code points: 128 + +# ================================================ + +180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 259 + +# ================================================ + +0009..000D ; Pattern_White_Space # Cc [5] <control-0009>..<control-000D> +0020 ; Pattern_White_Space # Zs SPACE +0085 ; Pattern_White_Space # Cc <control-0085> +200E..200F ; Pattern_White_Space # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2028 ; Pattern_White_Space # Zl LINE SEPARATOR +2029 ; Pattern_White_Space # Zp PARAGRAPH SEPARATOR + +# Total code points: 11 + +# ================================================ + +0021..0023 ; Pattern_Syntax # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Pattern_Syntax # Sc DOLLAR SIGN +0025..0027 ; Pattern_Syntax # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Pattern_Syntax # Ps LEFT PARENTHESIS +0029 ; Pattern_Syntax # Pe RIGHT PARENTHESIS +002A ; Pattern_Syntax # Po ASTERISK +002B ; Pattern_Syntax # Sm PLUS SIGN +002C ; Pattern_Syntax # Po COMMA +002D ; Pattern_Syntax # Pd HYPHEN-MINUS +002E..002F ; Pattern_Syntax # Po [2] FULL STOP..SOLIDUS +003A..003B ; Pattern_Syntax # Po [2] COLON..SEMICOLON +003C..003E ; Pattern_Syntax # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Pattern_Syntax # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET +005C ; Pattern_Syntax # Po REVERSE SOLIDUS +005D ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET +005E ; Pattern_Syntax # Sk CIRCUMFLEX ACCENT +0060 ; Pattern_Syntax # Sk GRAVE ACCENT +007B ; Pattern_Syntax # Ps LEFT CURLY BRACKET +007C ; Pattern_Syntax # Sm VERTICAL LINE +007D ; Pattern_Syntax # Pe RIGHT CURLY BRACKET +007E ; Pattern_Syntax # Sm TILDE +00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN +00A6 ; Pattern_Syntax # So BROKEN BAR +00A7 ; Pattern_Syntax # Po SECTION SIGN +00A9 ; Pattern_Syntax # So COPYRIGHT SIGN +00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Pattern_Syntax # Sm NOT SIGN +00AE ; Pattern_Syntax # So REGISTERED SIGN +00B0 ; Pattern_Syntax # So DEGREE SIGN +00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN +00B6 ; Pattern_Syntax # Po PILCROW SIGN +00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK +00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN +00F7 ; Pattern_Syntax # Sm DIVISION SIGN +2010..2015 ; Pattern_Syntax # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Pattern_Syntax # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Pattern_Syntax # Pi LEFT SINGLE QUOTATION MARK +2019 ; Pattern_Syntax # Pf RIGHT SINGLE QUOTATION MARK +201A ; Pattern_Syntax # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Pattern_Syntax # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Pattern_Syntax # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Pattern_Syntax # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Pattern_Syntax # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Pattern_Syntax # Po [8] DAGGER..HYPHENATION POINT +2030..2038 ; Pattern_Syntax # Po [9] PER MILLE SIGN..CARET +2039 ; Pattern_Syntax # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Pattern_Syntax # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Pattern_Syntax # Po [4] REFERENCE MARK..OVERLINE +2041..2043 ; Pattern_Syntax # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Pattern_Syntax # Sm FRACTION SLASH +2045 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Pattern_Syntax # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Pattern_Syntax # Sm COMMERCIAL MINUS SIGN +2053 ; Pattern_Syntax # Po SWUNG DASH +2055..205E ; Pattern_Syntax # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +2190..2194 ; Pattern_Syntax # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Pattern_Syntax # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Pattern_Syntax # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Pattern_Syntax # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Pattern_Syntax # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Pattern_Syntax # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Pattern_Syntax # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Pattern_Syntax # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Pattern_Syntax # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Pattern_Syntax # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Pattern_Syntax # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Pattern_Syntax # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Pattern_Syntax # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Pattern_Syntax # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Pattern_Syntax # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Pattern_Syntax # So DOWNWARDS DOUBLE ARROW +21D4 ; Pattern_Syntax # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Pattern_Syntax # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Pattern_Syntax # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Pattern_Syntax # So [8] DIAMETER SIGN..WAVY LINE +2308 ; Pattern_Syntax # Ps LEFT CEILING +2309 ; Pattern_Syntax # Pe RIGHT CEILING +230A ; Pattern_Syntax # Ps LEFT FLOOR +230B ; Pattern_Syntax # Pe RIGHT FLOOR +230C..231F ; Pattern_Syntax # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Pattern_Syntax # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Pattern_Syntax # So [7] FROWN..KEYBOARD +2329 ; Pattern_Syntax # Ps LEFT-POINTING ANGLE BRACKET +232A ; Pattern_Syntax # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Pattern_Syntax # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Pattern_Syntax # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Pattern_Syntax # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..2426 ; Pattern_Syntax # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO +2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F> +2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +244B..245F ; Pattern_Syntax # Cn [21] <reserved-244B>..<reserved-245F> +2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Pattern_Syntax # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Pattern_Syntax # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Pattern_Syntax # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Pattern_Syntax # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN +2670..2767 ; Pattern_Syntax # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET +2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Pattern_Syntax # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Pattern_Syntax # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Pattern_Syntax # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Pattern_Syntax # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Pattern_Syntax # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Pattern_Syntax # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Pattern_Syntax # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Pattern_Syntax # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Pattern_Syntax # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2794..27BF ; Pattern_Syntax # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Pattern_Syntax # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Pattern_Syntax # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Pattern_Syntax # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Pattern_Syntax # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Pattern_Syntax # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Pattern_Syntax # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; Pattern_Syntax # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..2982 ; Pattern_Syntax # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Pattern_Syntax # Ps LEFT WHITE CURLY BRACKET +2984 ; Pattern_Syntax # Pe RIGHT WHITE CURLY BRACKET +2985 ; Pattern_Syntax # Ps LEFT WHITE PARENTHESIS +2986 ; Pattern_Syntax # Pe RIGHT WHITE PARENTHESIS +2987 ; Pattern_Syntax # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Pattern_Syntax # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Pattern_Syntax # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Pattern_Syntax # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Pattern_Syntax # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Pattern_Syntax # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Pattern_Syntax # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Pattern_Syntax # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Pattern_Syntax # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Pattern_Syntax # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Pattern_Syntax # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Pattern_Syntax # Ps LEFT WIGGLY FENCE +29D9 ; Pattern_Syntax # Pe RIGHT WIGGLY FENCE +29DA ; Pattern_Syntax # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Pattern_Syntax # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Pattern_Syntax # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Pattern_Syntax # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Pattern_Syntax # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Pattern_Syntax # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Pattern_Syntax # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Pattern_Syntax # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B74..2B75 ; Pattern_Syntax # Cn [2] <reserved-2B74>..<reserved-2B75> +2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B96..2B97 ; Pattern_Syntax # Cn [2] <reserved-2B96>..<reserved-2B97> +2B98..2BB9 ; Pattern_Syntax # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX +2BBA..2BBC ; Pattern_Syntax # Cn [3] <reserved-2BBA>..<reserved-2BBC> +2BBD..2BC8 ; Pattern_Syntax # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED +2BC9 ; Pattern_Syntax # Cn <reserved-2BC9> +2BCA..2BD2 ; Pattern_Syntax # So [9] TOP HALF BLACK CIRCLE..GROUP MARK +2BD3..2BEB ; Pattern_Syntax # Cn [25] <reserved-2BD3>..<reserved-2BEB> +2BEC..2BEF ; Pattern_Syntax # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS +2BF0..2BFF ; Pattern_Syntax # Cn [16] <reserved-2BF0>..<reserved-2BFF> +2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Pattern_Syntax # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Pattern_Syntax # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Pattern_Syntax # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Pattern_Syntax # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Pattern_Syntax # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Pattern_Syntax # Po RAISED SQUARE +2E0C ; Pattern_Syntax # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Pattern_Syntax # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Pattern_Syntax # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Pattern_Syntax # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Pattern_Syntax # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Pattern_Syntax # Pd HYPHEN WITH DIAERESIS +2E1B ; Pattern_Syntax # Po TILDE WITH RING ABOVE +2E1C ; Pattern_Syntax # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Pattern_Syntax # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Pattern_Syntax # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Pattern_Syntax # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Pattern_Syntax # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Pattern_Syntax # Ps TOP LEFT HALF BRACKET +2E23 ; Pattern_Syntax # Pe TOP RIGHT HALF BRACKET +2E24 ; Pattern_Syntax # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Pattern_Syntax # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Pattern_Syntax # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Pattern_Syntax # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Pattern_Syntax # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Pattern_Syntax # Lm VERTICAL TILDE +2E30..2E39 ; Pattern_Syntax # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Pattern_Syntax # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; Pattern_Syntax # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN +2E41 ; Pattern_Syntax # Po REVERSED COMMA +2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E49 ; Pattern_Syntax # Po [7] DASH WITH LEFT UPTURN..DOUBLE STACKED COMMA +2E4A..2E7F ; Pattern_Syntax # Cn [54] <reserved-2E4A>..<reserved-2E7F> +3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET +3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET +300A ; Pattern_Syntax # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Pattern_Syntax # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Pattern_Syntax # Ps LEFT CORNER BRACKET +300D ; Pattern_Syntax # Pe RIGHT CORNER BRACKET +300E ; Pattern_Syntax # Ps LEFT WHITE CORNER BRACKET +300F ; Pattern_Syntax # Pe RIGHT WHITE CORNER BRACKET +3010 ; Pattern_Syntax # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Pattern_Syntax # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Pattern_Syntax # So [2] POSTAL MARK..GETA MARK +3014 ; Pattern_Syntax # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Pattern_Syntax # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Pattern_Syntax # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Pattern_Syntax # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Pattern_Syntax # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Pattern_Syntax # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Pattern_Syntax # Ps LEFT WHITE SQUARE BRACKET +301B ; Pattern_Syntax # Pe RIGHT WHITE SQUARE BRACKET +301C ; Pattern_Syntax # Pd WAVE DASH +301D ; Pattern_Syntax # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Pattern_Syntax # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Pattern_Syntax # So POSTAL MARK FACE +3030 ; Pattern_Syntax # Pd WAVY DASH +FD3E ; Pattern_Syntax # Pe ORNATE LEFT PARENTHESIS +FD3F ; Pattern_Syntax # Ps ORNATE RIGHT PARENTHESIS +FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT + +# Total code points: 2760 + +# ================================================ + +0600..0605 ; Prepended_Concatenation_Mark # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +06DD ; Prepended_Concatenation_Mark # Cf ARABIC END OF AYAH +070F ; Prepended_Concatenation_Mark # Cf SYRIAC ABBREVIATION MARK +08E2 ; Prepended_Concatenation_Mark # Cf ARABIC DISPUTED END OF AYAH +110BD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN + +# Total code points: 10 + +# ================================================ + +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + +# EOF diff --git a/localedata/unicode-gen/utf8_gen.py b/localedata/unicode-gen/utf8_gen.py index 52c79e83c1..26939e25a8 100755 --- a/localedata/unicode-gen/utf8_gen.py +++ b/localedata/unicode-gen/utf8_gen.py @@ -215,9 +215,11 @@ def write_header_width(outfile): # outfile.write("% \"grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt\"\n") outfile.write("WIDTH\n") -def process_width(outfile, ulines, elines): +def process_width(outfile, ulines, elines, plines): '''ulines are lines from UnicodeData.txt, elines are lines from - EastAsianWidth.txt + EastAsianWidth.txt containing characters with width “W” or “F”, + plines are lines from PropList.txt which contain characters + with the property “Prepended_Concatenation_Mark”. ''' width_dict = {} @@ -230,16 +232,29 @@ def process_width(outfile, ulines, elines): for key in range(int(code_points[0], 16), int(code_points[1], 16)+1): width_dict[key] = 2 + for line in ulines: fields = line.split(";") if fields[4] == "NSM" or fields[2] in ("Cf", "Me", "Mn"): width_dict[int(fields[0], 16)] = 0 + for line in plines: + # Characters with the property “Prepended_Concatenation_Mark” + # should have the width 1: + fields = line.split(";") + if not '..' in fields[0]: + code_points = (fields[0], fields[0]) + else: + code_points = fields[0].split("..") + for key in range(int(code_points[0], 16), + int(code_points[1], 16)+1): + del width_dict[key] # default width is 1 + # handle special cases for compatibility for key in list((0x00AD,)): # https://www.cs.tut.fi/~jkorpela/shy.html if key in width_dict: - del width_dict[key] + del width_dict[key] # default width is 1 for key in list(range(0x1160, 0x1200)): width_dict[key] = 0 for key in list(range(0x3248, 0x3250)): @@ -278,7 +293,7 @@ def process_width(outfile, ulines, elines): if __name__ == "__main__": if len(sys.argv) < 3: - print("USAGE: python3 utf8_gen.py UnicodeData.txt EastAsianWidth.txt") + print("USAGE: python3 utf8_gen.py UnicodeData.txt EastAsianWidth.txt PropList.txt") else: with open(sys.argv[1], mode='r') as UNIDATA_FILE: UNICODE_DATA_LINES = UNIDATA_FILE.readlines() @@ -298,6 +313,11 @@ if __name__ == "__main__": continue if re.match(r'^[^;]*;[WF]', LINE): EAST_ASIAN_WIDTH_LINES.append(LINE.strip()) + with open(sys.argv[3], mode='r') as PROP_LIST_FILE: + PROP_LIST_LINES = [] + for LINE in PROP_LIST_FILE: + if re.match(r'^[^;]*;[\s]*Prepended_Concatenation_Mark', LINE): + PROP_LIST_LINES.append(LINE.strip()) with open('UTF-8', mode='w') as OUTFILE: # Processing UnicodeData.txt and write CHARMAP to UTF-8 file write_header_charmap(OUTFILE) @@ -305,5 +325,8 @@ if __name__ == "__main__": OUTFILE.write("END CHARMAP\n\n") # Processing EastAsianWidth.txt and write WIDTH to UTF-8 file write_header_width(OUTFILE) - process_width(OUTFILE, UNICODE_DATA_LINES, EAST_ASIAN_WIDTH_LINES) + process_width(OUTFILE, + UNICODE_DATA_LINES, + EAST_ASIAN_WIDTH_LINES, + PROP_LIST_LINES) OUTFILE.write("END WIDTH\n") |