diff options
author | Ulrich Drepper <drepper@redhat.com> | 2000-10-05 02:01:22 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2000-10-05 02:01:22 +0000 |
commit | f00f95d188d8b0b8adf6eaff849eba59fe5b5f59 (patch) | |
tree | 006b382527502c8227b0e6552ab389307303ef5f /localedata | |
parent | 84ca314775458b3c80d4ce1763c4517a5e0d3b1a (diff) | |
download | glibc-f00f95d188d8b0b8adf6eaff849eba59fe5b5f59.tar glibc-f00f95d188d8b0b8adf6eaff849eba59fe5b5f59.tar.gz glibc-f00f95d188d8b0b8adf6eaff849eba59fe5b5f59.tar.bz2 glibc-f00f95d188d8b0b8adf6eaff849eba59fe5b5f59.zip |
Update.
* locales/zh_CN: Remove non-ASCII digit entry.
* locales/ja_JP: Likewise.
2000-09-30 Bruno Haible <haible@clisp.cons.org>
* gen-unicode-ctype.c (is_digit, is_xdigit): Remove all non-ASCII
digits.
(is_alpha): Add them here.
* locales/i18n (digit): Remove all non-ASCII digits.
(alpha): Add them here.
* dump-ctype.c: Pretty print.
2000-10-04 Ulrich Drepper <drepper@redhat.com>
Diffstat (limited to 'localedata')
-rw-r--r-- | localedata/ChangeLog | 15 | ||||
-rw-r--r-- | localedata/dump-ctype.c | 3 | ||||
-rw-r--r-- | localedata/gen-unicode-ctype.c | 94 | ||||
-rw-r--r-- | localedata/locales/i18n | 18 | ||||
-rw-r--r-- | localedata/locales/ja_JP | 4 | ||||
-rw-r--r-- | localedata/locales/zh_CN | 4 |
6 files changed, 93 insertions, 45 deletions
diff --git a/localedata/ChangeLog b/localedata/ChangeLog index 30cbff3ff1..296a5ba330 100644 --- a/localedata/ChangeLog +++ b/localedata/ChangeLog @@ -1,5 +1,20 @@ 2000-10-04 Ulrich Drepper <drepper@redhat.com> + * locales/zh_CN: Remove non-ASCII digit entry. + * locales/ja_JP: Likewise. + +2000-09-30 Bruno Haible <haible@clisp.cons.org> + + * gen-unicode-ctype.c (is_digit, is_xdigit): Remove all non-ASCII + digits. + (is_alpha): Add them here. + * locales/i18n (digit): Remove all non-ASCII digits. + (alpha): Add them here. + + * dump-ctype.c: Pretty print. + +2000-10-04 Ulrich Drepper <drepper@redhat.com> + * charmaps/EUC-KR: Change \x5c mapping to U20a9. * locales/ko_KR: Change currency_symbol back to use U20a9. diff --git a/localedata/dump-ctype.c b/localedata/dump-ctype.c index 9514ca7654..66d539a412 100644 --- a/localedata/dump-ctype.c +++ b/localedata/dump-ctype.c @@ -113,7 +113,8 @@ static void dump_map (const char *map_name) } } -int main (int argc, char *argv[]) +int +main (int argc, char *argv[]) { size_t i; diff --git a/localedata/gen-unicode-ctype.c b/localedata/gen-unicode-ctype.c index b45e70351a..1259aef872 100644 --- a/localedata/gen-unicode-ctype.c +++ b/localedata/gen-unicode-ctype.c @@ -166,21 +166,21 @@ fill_attributes (const char *unicodedata_filename) int n; lineno++; - n = getfield(stream, field0, ';'); - n += getfield(stream, field1, ';'); - n += getfield(stream, field2, ';'); - n += getfield(stream, field3, ';'); - n += getfield(stream, field4, ';'); - n += getfield(stream, field5, ';'); - n += getfield(stream, field6, ';'); - n += getfield(stream, field7, ';'); - n += getfield(stream, field8, ';'); - n += getfield(stream, field9, ';'); - n += getfield(stream, field10, ';'); - n += getfield(stream, field11, ';'); - n += getfield(stream, field12, ';'); - n += getfield(stream, field13, ';'); - n += getfield(stream, field14, '\n'); + n = getfield (stream, field0, ';'); + n += getfield (stream, field1, ';'); + n += getfield (stream, field2, ';'); + n += getfield (stream, field3, ';'); + n += getfield (stream, field4, ';'); + n += getfield (stream, field5, ';'); + n += getfield (stream, field6, ';'); + n += getfield (stream, field7, ';'); + n += getfield (stream, field8, ';'); + n += getfield (stream, field9, ';'); + n += getfield (stream, field10, ';'); + n += getfield (stream, field11, ';'); + n += getfield (stream, field12, ';'); + n += getfield (stream, field13, ';'); + n += getfield (stream, field14, '\n'); if (n == 0) break; if (n != 15) @@ -196,21 +196,21 @@ fill_attributes (const char *unicodedata_filename) { /* Deal with a range. */ lineno++; - n = getfield(stream, field0, ';'); - n += getfield(stream, field1, ';'); - n += getfield(stream, field2, ';'); - n += getfield(stream, field3, ';'); - n += getfield(stream, field4, ';'); - n += getfield(stream, field5, ';'); - n += getfield(stream, field6, ';'); - n += getfield(stream, field7, ';'); - n += getfield(stream, field8, ';'); - n += getfield(stream, field9, ';'); - n += getfield(stream, field10, ';'); - n += getfield(stream, field11, ';'); - n += getfield(stream, field12, ';'); - n += getfield(stream, field13, ';'); - n += getfield(stream, field14, '\n'); + n = getfield (stream, field0, ';'); + n += getfield (stream, field1, ';'); + n += getfield (stream, field2, ';'); + n += getfield (stream, field3, ';'); + n += getfield (stream, field4, ';'); + n += getfield (stream, field5, ';'); + n += getfield (stream, field6, ';'); + n += getfield (stream, field7, ';'); + n += getfield (stream, field8, ';'); + n += getfield (stream, field9, ';'); + n += getfield (stream, field10, ';'); + n += getfield (stream, field11, ';'); + n += getfield (stream, field12, ';'); + n += getfield (stream, field13, ';'); + n += getfield (stream, field14, '\n'); if (n != 15) { fprintf (stderr, "missing end range in '%s':%d\n", @@ -390,17 +390,35 @@ is_alpha (unsigned int ch) || (unicode_attributes[ch].category[0] == 'S' && unicode_attributes[ch].category[1] == 'o' && strstr (unicode_attributes[ch].name, " LETTER ") - != NULL))); + != NULL) + /* Consider all the non-ASCII digits as alphabetic. + ISO C 99 forbids us to have them in category "digit", + but we want iswalnum to return true on them. */ + || (unicode_attributes[ch].category[0] == 'N' + && unicode_attributes[ch].category[1] == 'd' + && !(ch >= 0x0030 && ch <= 0x0039)))); } static bool is_digit (unsigned int ch) { +#if 0 return (unicode_attributes[ch].name != NULL && unicode_attributes[ch].category[0] == 'N' && unicode_attributes[ch].category[1] == 'd'); /* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without a zero. Must add <0> in front of them by hand. */ +#else + /* SUSV2 gives us some freedom for the "digit" category, but ISO C 99 + takes it away: + 7.25.2.1.5: + The iswdigit function tests for any wide character that corresponds + to a decimal-digit character (as defined in 5.2.1). + 5.2.1: + the 10 decimal digits 0 1 2 3 4 5 6 7 8 9 + */ + return (ch >= 0x0030 && ch <= 0x0039); +#endif } static bool @@ -455,9 +473,23 @@ is_cntrl (unsigned int ch) static bool is_xdigit (unsigned int ch) { +#if 0 return is_digit (ch) || (ch >= 0x0041 && ch <= 0x0046) || (ch >= 0x0061 && ch <= 0x0066); +#else + /* SUSV2 gives us some freedom for the "xdigit" category, but ISO C 99 + takes it away: + 7.25.2.1.12: + The iswxdigit function tests for any wide character that corresponds + to a hexadecimal-digit character (as defined in 6.4.4.1). + 6.4.4.1: + hexadecimal-digit: one of 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F + */ + return (ch >= 0x0030 && ch <= 0x0039) + || (ch >= 0x0041 && ch <= 0x0046) + || (ch >= 0x0061 && ch <= 0x0066); +#endif } static bool diff --git a/localedata/locales/i18n b/localedata/locales/i18n index b284ded6c0..977c58536e 100644 --- a/localedata/locales/i18n +++ b/localedata/locales/i18n @@ -305,13 +305,10 @@ alpha / <UFE70>..<UFE72>;<UFE74>;<UFE76>..<UFEFC>;/ % HALFWIDTH AND FULLWIDTH FORMS/ <UFF21>..<UFF3A>;<UFF41>..<UFF5A>;<UFF66>..<UFFBE>;<UFFC2>..<UFFC7>;/ - <UFFCA>..<UFFCF>;<UFFD2>..<UFFD7>;<UFFDA>..<UFFDC> - -% The "digit" class of the "i18n" FDCC-set is reflecting -% the recommendations in TR 10176 annex A -digit / -% TABLE 1 BASIC LATIN/ - <U0030>..<U0039>;/ + <UFFCA>..<UFFCF>;<UFFD2>..<UFFD7>;<UFFDA>..<UFFDC>;/ +% The non-ASCII number characters are included here because ISO C 99 / +% forbids us to classify them as digits; however, they behave more like / +% alphanumeric than like punctuation. / % TABLE 15 and 16 ARABIC/ <U0660>..<U0669>;<U06F0>..<U06F9>;/ % TABLE 17 DEVANAGARI/ @@ -349,6 +346,11 @@ digit / % HALFWIDTH AND FULLWIDTH FORMS/ <UFF10>..<UFF19> +% The "digit" class must only contain the BASIC LATIN digits, says ISO C 99 +% (sections 7.25.2.1.5 and 5.2.1). +digit / + <U0030>..<U0039> + outdigit <U0030>..<U0039> space / @@ -602,6 +604,8 @@ print / <UFFD2>..<UFFD7>;<UFFDA>..<UFFDC>;<UFFE0>..<UFFE6>;<UFFE8>..<UFFEE>;/ <UFFF9>..<UFFFD> +% The "xdigit" class must only contain the BASIC LATIN digits and A-F, a-f, +% says ISO C 99 (sections 7.25.2.1.12 and 6.4.4.1). xdigit / <U0030>..<U0039>;<U0041>..<U0046>;<U0061>..<U0066> diff --git a/localedata/locales/ja_JP b/localedata/locales/ja_JP index 4c4d2b6746..826247f490 100644 --- a/localedata/locales/ja_JP +++ b/localedata/locales/ja_JP @@ -155,9 +155,7 @@ alpha <U0041>;<U0042>;<U0043>;<U0044>;<U0045>;<U0046>;<U0047>;<U0048>;/ <U0175>;<U00FD>;<U00FF>;<U0177>;<U017A>;<U017E>;<U017C> digit <U0030>;<U0031>;<U0032>;<U0033>;<U0034>;/ - <U0035>;<U0036>;<U0037>;<U0038>;<U0039>;/ - <UFF10>;<UFF11>;<UFF12>;<UFF13>;<UFF14>;/ - <UFF15>;<UFF16>;<UFF17>;<UFF18>;<UFF19> + <U0035>;<U0036>;<U0037>;<U0038>;<U0039> xdigit <U0030>;<U0031>;<U0032>;<U0033>;<U0034>;/ <U0035>;<U0036>;<U0037>;<U0038>;<U0039>;/ diff --git a/localedata/locales/zh_CN b/localedata/locales/zh_CN index 7c98bc9f8d..278dd231ed 100644 --- a/localedata/locales/zh_CN +++ b/localedata/locales/zh_CN @@ -90,9 +90,7 @@ alpha <U0041>..<U005A>;<U0061>..<U007A>;/ % Bopomofo/ <U3105>..<U3129> -digit <U0030>..<U0039>;/ -% Fullwidth digits/ - <UFF10>..<UFF19> +digit <U0030>..<U0039> outdigit <U0030>..<U0039> |