From 827ff7580a99943f22c2d71fd95c67d86bb99ad7 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 17 Nov 1999 23:06:37 +0000 Subject: Update. 1999-11-17 Ulrich Drepper * locale/programs/ld-ctype.c: Implement character width information handling. --- locale/programs/ld-ctype.c | 126 ++++++++++++++++++++++++++++++++++++--------- locale/programs/locfile.h | 2 +- 2 files changed, 103 insertions(+), 25 deletions(-) (limited to 'locale/programs') diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index 2f99cf8dfc..5d88fd0509 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -527,16 +527,54 @@ character '%s' in class `%s' must not be in class `%s'"), if (charmap->width_rules != NULL) for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) { -#if 0 - size_t inner; - for (inner = charmap->width_rules[cnt].from; - inner <= charmap->width_rules[cnt].to; ++inner) - (void) find_idx (ctype, NULL, NULL, NULL, inner); -#else - /* XXX Handle width. We must convert from the charseq to the - repertoire value */ - abort (); -#endif + unsigned char bytes[charmap->mb_cur_max]; + int nbytes = charmap->width_rules[cnt].from->nbytes; + + /* We have the range of character for which the width is + specified described using byte sequences of the multibyte + charset. We have to convert this to UCS4 now. And we + cannot simply convert the beginning and the end of the + sequence, we have to iterate over the byte sequence and + convert it for every single character. */ + memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes); + + while (nbytes < charmap->width_rules[cnt].to->nbytes + || memcmp (bytes, charmap->width_rules[cnt].to->bytes, + nbytes) <= 0) + { + /* Find the UCS value for `bytes'. */ + uint32_t wch = repertoire_find_value (ctype->repertoire, bytes, + nbytes); + int inner; + + if (wch != ILLEGAL_CHAR_VALUE) + /* We are only interested in the side-effects of the + `find_idx' call. It will add appropriate entries in + the name array if this is necessary. */ + (void) find_idx (ctype, NULL, NULL, NULL, wch); + + /* "Increment" the bytes sequence. */ + inner = nbytes - 1; + while (inner >= 0 && bytes[inner] == 0xff) + --inner; + + if (inner < 0) + { + /* We have to extend the byte sequence. */ + if (nbytes >= charmap->width_rules[cnt].to->nbytes) + break; + + bytes[0] = 1; + memset (&bytes[1], 0, nbytes); + ++nbytes; + } + else + { + ++bytes[inner]; + while (++inner < nbytes) + bytes[inner] = 0; + } + } } /* There must be a multiple of 10 digits. */ @@ -2973,27 +3011,67 @@ Computing table size for character classes might take a while..."), ctype->plane_size * ctype->plane_cnt); if (charmap->width_rules != NULL) { -#if 0 size_t cnt; for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) - if (charmap->width_rules[cnt].width != charmap->width_default) - for (idx = charmap->width_rules[cnt].from; - idx <= charmap->width_rules[cnt].to; ++idx) + { + unsigned char bytes[charmap->mb_cur_max]; + int nbytes = charmap->width_rules[cnt].from->nbytes; + + /* We have the range of character for which the width is + specified described using byte sequences of the multibyte + charset. We have to convert this to UCS4 now. And we + cannot simply convert the beginning and the end of the + sequence, we have to iterate over the byte sequence and + convert it for every single character. */ + memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes); + + while (nbytes < charmap->width_rules[cnt].to->nbytes + || memcmp (bytes, charmap->width_rules[cnt].to->bytes, + nbytes) <= 0) { - size_t nr = idx % ctype->plane_size; - size_t depth = 0; + /* Find the UCS value for `bytes'. */ + uint32_t wch = repertoire_find_value (ctype->repertoire, bytes, + nbytes); + int inner; + + if (wch != ILLEGAL_CHAR_VALUE) + { + /* Store the value. */ + size_t nr = idx % ctype->plane_size; + size_t depth = 0; + + while (ctype->names[nr + depth * ctype->plane_size] != nr) + ++depth; + assert (depth < ctype->plane_cnt); + + ctype->width[nr + depth * ctype->plane_size] + = charmap->width_rules[cnt].width; + } + + /* "Increment" the bytes sequence. */ + inner = nbytes - 1; + while (inner >= 0 && bytes[inner] == 0xff) + --inner; - while (ctype->names[nr + depth * ctype->plane_size] != nr) - ++depth; - assert (depth < ctype->plane_cnt); + if (inner < 0) + { + /* We have to extend the byte sequence. */ + if (nbytes >= charmap->width_rules[cnt].to->nbytes) + break; - ctype->width[nr + depth * ctype->plane_size] - = charmap->width_rules[cnt].width; + bytes[0] = 1; + memset (&bytes[1], 0, nbytes); + ++nbytes; + } + else + { + ++bytes[inner]; + while (++inner < nbytes) + bytes[inner] = 0; + } } -#else - abort (); -#endif + } } /* Set MB_CUR_MAX. */ diff --git a/locale/programs/locfile.h b/locale/programs/locfile.h index 8117259502..50a1d7709d 100644 --- a/locale/programs/locfile.h +++ b/locale/programs/locfile.h @@ -167,7 +167,7 @@ extern void numeric_read (struct linereader *ldfile, const char *repertoire_name, int ignore_content); extern void numeric_finish (struct localedef_t *locale, - struct charmap_t *charmap); + struct charmap_t *charmap); extern void numeric_output (struct localedef_t *locale, struct charmap_t *charmap, const char *output_path); -- cgit v1.2.3-70-g09d2