From a0dc52061fd8d47c37bbd363533ae0a4307e507c Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 7 Apr 2000 02:38:44 +0000 Subject: Update. 2000-04-06 Ulrich Drepper * locale/programs/charmap.c (charmap_new_char): Add parameter step. Support ..(2).. ellipsis. (parse_charmap): Recognize ..(2).. etc and pass step down. Correctly generate names for UCS4 characters. * locale/programs/ld-ctype.c (struct translit_ignore_t): Add step. (ctype_finish): We know the wide character value for , don't search. (charclass_symbolic_ellipsis): Handle ..(2).. ellipsis. (charclass_ucs4_ellipsis): Likewise. (read_translit_ignore_entry): Store ellipsis step. (ctype_read): Recognize ..(2).. etc and pass step down. * locale/programs/linereader.c (lr_token): When seeing comment character ignore only rest of line in sources but stop at escaped newline. Recognize ..(2).. and ....(2)..... * locale/programs/locfile-token.h (enum token_t): Add tok_ellipsis2_2 and tok_ellipsis4_2. --- locale/programs/charmap.c | 32 +++++++++++++------ locale/programs/ld-ctype.c | 69 +++++++++++++++++++++++++++-------------- locale/programs/linereader.c | 26 ++++++++++++++++ locale/programs/locfile-token.h | 4 ++- 4 files changed, 98 insertions(+), 33 deletions(-) (limited to 'locale') diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c index 37047a0407..3ccebcac48 100644 --- a/locale/programs/charmap.c +++ b/locale/programs/charmap.c @@ -53,7 +53,7 @@ static void new_width (struct linereader *cmfile, struct charmap_t *result, unsigned long int width); static void charmap_new_char (struct linereader *lr, struct charmap_t *cm, int nbytes, char *bytes, const char *from, - const char *to, int decimal_ellipsis); + const char *to, int decimal_ellipsis, int step); struct charmap_t * @@ -225,6 +225,7 @@ parse_charmap (struct linereader *cmfile) char *from_name = NULL; char *to_name = NULL; enum token_t ellipsis = 0; + int step = 1; /* We don't want symbolic names in string to be translated. */ cmfile->translate_strings = 0; @@ -461,7 +462,7 @@ character sets with locking states are not supported")); now->val.str.lenmb); else { - obstack_printf (&result->mem_pool, "<%08X>", + obstack_printf (&result->mem_pool, "U%08X", cmfile->token.val.ucs4); obstack_1grow (&result->mem_pool, '\0'); from_name = (char *) obstack_finish (&result->mem_pool); @@ -475,9 +476,20 @@ character sets with locking states are not supported")); /* We have two possibilities: We can see an ellipsis or an encoding value. */ if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4 - || nowtok == tok_ellipsis2) + || nowtok == tok_ellipsis2 || nowtok == tok_ellipsis4_2 + || nowtok == tok_ellipsis2_2) { ellipsis = nowtok; + if (nowtok == tok_ellipsis4_2) + { + step = 2; + nowtok = tok_ellipsis4; + } + else if (nowtok == tok_ellipsis2_2) + { + step = 2; + nowtok = tok_ellipsis2; + } state = 4; continue; } @@ -502,13 +514,15 @@ character sets with locking states are not supported")); else charmap_new_char (cmfile, result, now->val.charcode.nbytes, now->val.charcode.bytes, from_name, to_name, - ellipsis != tok_ellipsis2); + ellipsis != tok_ellipsis2, step); /* Ignore trailing comment silently. */ lr_ignore_rest (cmfile, 0); from_name = NULL; to_name = NULL; + ellipsis = tok_none; + step = 1; state = 2; continue; @@ -531,7 +545,7 @@ character sets with locking states are not supported")); cmfile->token.val.str.lenmb); else { - obstack_printf (&result->mem_pool, "<%08X>", + obstack_printf (&result->mem_pool, "U%08X", cmfile->token.val.ucs4); obstack_1grow (&result->mem_pool, '\0'); to_name = (char *) obstack_finish (&result->mem_pool); @@ -814,7 +828,7 @@ charmap_find_value (const struct charmap_t *cm, const char *name, size_t len) static void charmap_new_char (struct linereader *lr, struct charmap_t *cm, int nbytes, char *bytes, const char *from, const char *to, - int decimal_ellipsis) + int decimal_ellipsis, int step) { hash_table *ht = &cm->char_table; hash_table *bt = &cm->byte_table; @@ -833,7 +847,7 @@ charmap_new_char (struct linereader *lr, struct charmap_t *cm, newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); newp->nbytes = nbytes; memcpy (newp->bytes, bytes, nbytes); - newp->name = obstack_copy (ob, from, len1 + 1); + newp->name = from; newp->ucs4 = UNINITIALIZED_CHAR_VALUE; if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9)) @@ -852,7 +866,7 @@ charmap_new_char (struct linereader *lr, struct charmap_t *cm, char *endp; errno = 0; - newp->ucs4 = strtoul (from, &endp, 16); + newp->ucs4 = strtoul (from + 1, &endp, 16); if (endp - from != len1 || (newp->ucs4 == ULONG_MAX && errno == ERANGE) || newp->ucs4 >= 0x80000000) @@ -916,7 +930,7 @@ hexadecimal range format should use only capital characters")); return; } - for (cnt = from_nr; cnt <= to_nr; ++cnt) + for (cnt = from_nr; cnt <= to_nr; cnt += step) { char *name_end; obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X", diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index 23ca2368c8..9cf4d2c5f5 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -97,6 +97,7 @@ struct translit_ignore_t { uint32_t from; uint32_t to; + uint32_t step; const char *fname; size_t lineno; @@ -504,18 +505,13 @@ character '%s' in class `%s' must not be in class `%s'"), } /* ... and now test as a special case. */ - space_value = repertoire_find_value (ctype->repertoire, "SP", 2); - if (space_value == ILLEGAL_CHAR_VALUE) - { - if (!be_quiet) - error (0, 0, _("character not defined in character map")); - } - else if (((cnt = BITPOS (tok_space), - (ELEM (ctype, class_collection, , space_value) - & BITw (tok_space)) == 0) - || (cnt = BITPOS (tok_blank), - (ELEM (ctype, class_collection, , space_value) - & BITw (tok_blank)) == 0))) + space_value = 32; + if (((cnt = BITPOS (tok_space), + (ELEM (ctype, class_collection, , space_value) + & BITw (tok_space)) == 0) + || (cnt = BITPOS (tok_blank), + (ELEM (ctype, class_collection, , space_value) + & BITw (tok_blank)) == 0))) { if (!be_quiet) error (0, 0, _(" character not in class `%s'"), @@ -1236,7 +1232,8 @@ get_character (struct token *now, struct charmap_t *charmap, } -/* Ellipsis like in `..' or `....'. */ +/* Ellipsis like in `..' or `....' and + the .(2). counterparts. */ static void charclass_symbolic_ellipsis (struct linereader *ldfile, struct locale_ctype_t *ctype, @@ -1246,7 +1243,7 @@ charclass_symbolic_ellipsis (struct linereader *ldfile, const char *last_str, unsigned long int class256_bit, unsigned long int class_bit, int base, - int ignore_content, int handle_digits) + int ignore_content, int handle_digits, int step) { const char *nowstr = now->val.str.startmb; char tmp[now->val.str.lenmb + 1]; @@ -1288,7 +1285,7 @@ charclass_symbolic_ellipsis (struct linereader *ldfile, if (!ignore_content) { now->val.str.startmb = tmp; - while (++from <= to) + while ((from += step) <= to) { struct charseq *seq; uint32_t wch; @@ -1346,7 +1343,7 @@ charclass_symbolic_ellipsis (struct linereader *ldfile, } -/* Ellipsis like in `..'. */ +/* Ellipsis like in `..' or `..(2)..'. */ static void charclass_ucs4_ellipsis (struct linereader *ldfile, struct locale_ctype_t *ctype, @@ -1355,7 +1352,7 @@ charclass_ucs4_ellipsis (struct linereader *ldfile, struct token *now, uint32_t last_wch, unsigned long int class256_bit, unsigned long int class_bit, int ignore_content, - int handle_digits) + int handle_digits, int step) { if (last_wch > now->val.ucs4) { @@ -1367,7 +1364,7 @@ to-value of range is smaller than from-value "), } if (!ignore_content) - while (++last_wch <= now->val.ucs4) + while ((last_wch += step) <= now->val.ucs4) { /* We have to find out whether there is a byte sequence corresponding to this UCS4 value. */ @@ -1376,6 +1373,11 @@ to-value of range is smaller than from-value "), snprintf (utmp, sizeof (utmp), "U%08X", last_wch); seq = charmap_find_value (charmap, utmp, 9); + if (seq == NULL) + { + snprintf (utmp, sizeof (utmp), "U%04X", last_wch); + seq = charmap_find_value (charmap, utmp, 5); + } if (seq == NULL) /* Try looking in the repertoire map. */ @@ -1779,6 +1781,7 @@ read_translit_ignore_entry (struct linereader *ldfile, obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t)); newp->from = from; newp->to = from; + newp->step = 1; newp->next = ctype->translit_ignore; ctype->translit_ignore = newp; @@ -1788,11 +1791,12 @@ read_translit_ignore_entry (struct linereader *ldfile, line. */ now = lr_token (ldfile, charmap, repertoire); - if (now->tok == tok_ellipsis2) + if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2) { /* XXX Should we bother implementing `....'? `...' certainly will not be implemented. */ uint32_t to; + int step = now->tok == tok_ellipsis2_2 ? 2 : 1; now = lr_token (ldfile, charmap, repertoire); @@ -1823,7 +1827,10 @@ read_translit_ignore_entry (struct linereader *ldfile, { /* Make sure the `to'-value is larger. */ if (to >= from) - newp->to = to; + { + newp->to = to; + newp->step = step; + } else lr_error (ldfile, _("\ to-value of range is smaller than from-value "), @@ -1866,6 +1873,7 @@ ctype_read (struct linereader *ldfile, struct localedef_t *result, uint32_t last_wch = 0; enum token_t last_token; enum token_t ellipsis_token; + int step; char last_charcode[16]; size_t last_charcode_len = 0; const char *last_str = NULL; @@ -2040,6 +2048,7 @@ ctype_read (struct linereader *ldfile, struct localedef_t *result, ctype->class_done |= class_bit; last_token = tok_none; ellipsis_token = tok_none; + step = 1; now = lr_token (ldfile, charmap, NULL); while (now->tok != tok_eol && now->tok != tok_eof) { @@ -2140,7 +2149,7 @@ the absolute ellipsis `...' must not be used")); == tok_ellipsis4 ? 10 : 16), ignore_content, - handle_digits); + handle_digits, step); } else if (last_token == tok_ucs4) { @@ -2151,7 +2160,8 @@ with UCS range values one must use the hexadecimal symbolic ellipsis `..'")); charclass_ucs4_ellipsis (ldfile, ctype, charmap, repertoire, now, last_wch, class256_bit, class_bit, - ignore_content, handle_digits); + ignore_content, handle_digits, + step); } else { @@ -2180,9 +2190,21 @@ with character code range values one must use the absolute ellipsis `...'")); break; if (last_token != tok_none - && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4) + && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2) { + if (now->tok == tok_ellipsis2_2) + { + now->tok = tok_ellipsis2; + step = 2; + } + else if (now->tok == tok_ellipsis4_2) + { + now->tok = tok_ellipsis4; + step = 2; + } + ellipsis_token = now->tok; + now = lr_token (ldfile, charmap, NULL); continue; } @@ -2194,6 +2216,7 @@ with character code range values one must use the absolute ellipsis `...'")); now = lr_token (ldfile, charmap, NULL); ellipsis_token = tok_none; + step = 1; } break; diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c index f6532a4792..36dd0cd2d0 100644 --- a/locale/programs/linereader.c +++ b/locale/programs/linereader.c @@ -185,6 +185,16 @@ lr_token (struct linereader *lr, const struct charmap_t *charmap, if (ch != lr->comment_char) break; + /* Is there an newline at the end of the buffer? */ + if (lr->buf[lr->bufact - 1] != '\n') + { + /* No. Some people want this to mean that only the line in + the file not the logical, concatenated line is ignored. + Let's try this. */ + lr->idx = lr->bufact; + continue; + } + /* Ignore rest of line. */ lr_ignore_rest (lr, 0); lr->token.tok = tok_eol; @@ -198,6 +208,14 @@ lr_token (struct linereader *lr, const struct charmap_t *charmap, /* Match ellipsis. */ if (ch == '.') { + if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0) + { + int cnt; + for (cnt = 0; cnt < 10; ++cnt) + lr_getc (lr); + lr->token.tok = tok_ellipsis4_2; + return &lr->token; + } if (strncmp (&lr->buf[lr->idx], "...", 3) == 0) { lr_getc (lr); @@ -213,6 +231,14 @@ lr_token (struct linereader *lr, const struct charmap_t *charmap, lr->token.tok = tok_ellipsis3; return &lr->token; } + if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0) + { + int cnt; + for (cnt = 0; cnt < 6; ++cnt) + lr_getc (lr); + lr->token.tok = tok_ellipsis2_2; + return &lr->token; + } if (lr->buf[lr->idx] == '.') { lr_getc (lr); diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h index e1cd5f7a74..6eecc5618a 100644 --- a/locale/programs/locfile-token.h +++ b/locale/programs/locfile-token.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1996. @@ -31,6 +31,8 @@ enum token_t tok_ellipsis2, tok_ellipsis3, tok_ellipsis4, + tok_ellipsis2_2, + tok_ellipsis4_2, tok_semicolon, tok_comma, tok_open_brace, -- cgit v1.2.3-70-g09d2