From 4b10dd6c1959577f57850ca427a94fe22b9f3299 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Tue, 31 Aug 1999 07:04:41 +0000 Subject: Update. * locale/Makefile (distribute): Add iso-639.def and iso-3166.def. Change charset.h to charmap.h. (categories): Add new categories. Leave out collate for now. Update build rules. * locale/categories.def: Add definitions for new categories. * locale/langinfo.h: Likewise. * locale/locale.h: Likewise. * locale/C-address.c: New file. * locale/C-identification.c: New file. * locale/C-measurement.c: New file. * locale/C-name.c: New file. * locale/C-paper.c: New file. * locale/C-telephone.c: New file. * locale/lc-address.c: Likewise. * locale/lc-identification.c: Likewise. * locale/lc-measurement.c: Likewise. * locale/lc-name.c: Likewise. * locale/lc-paper.c: Likewise. * locale/lc-telephone.c: Likewise. * locale/C-ctype.c: Update for locale rewrite. * locale/C-messages.c: Likewise. * locale/C-monetary.c: Likewise. * locale/C-time.c: Likewise. * locale/lc-collate.c: Likewise. * locale/lc-ctype.c: Likewise. * locale/lc-monetary.c: Likewise. * locale/lc-time.c: Likewise. * locale/localeinfo.h: Likewise. * locale/newlocale.c: Likewise. * locale/setlocale.c: Likewise. * locale/weight.h: Likewise. * locale/findlocale.c: Unconditionally use mmap. Handle new categories. * locale/loadlocale.c: Likewise. * locale/iso-3166.def: New file. * locale/iso-639.def: New file. * locale/programs/charmap-kw.gperf: Add new keywords. * locale/programs/locfile-kw.gperf: Likewise. * locale/programs/locfile-token.h: Define new tokens. * locale/programs/charmap.c: Rewrite to handle multibyte charsets. * locale/programs/charmap.h: New file. * locale/programs/charset.h: Removed. * locale/programs/config.h: Add __LC_LAST. * locale/programs/lc-address.c: New file. * locale/programs/lc-identification.c: New file. * locale/programs/lc-measurement.c: New file. * locale/programs/lc-name.c: New file. * locale/programs/lc-paper.c: New file. * locale/programs/lc-telephone.c: New file. * locale/programs/lc-collate.c: Update for locale rewrite. * locale/programs/lc-ctype.c: Likewise. * locale/programs/lc-messages.c: Likewise. * locale/programs/lc-monetary.c: Likewise. * locale/programs/lc-numeric.c: Likewise. * locale/programs/lc-time.c: Likewise. * locale/programs/locale.c: Likewise. * locale/programs/localedef.c: Likewise. * locale/programs/locfile.c: Likewise. * locale/programs/repertoire.c: Likewise. * locale/programs/repertoire.h: Likewise. * locale/programs/locfile.c: Update prototypes. Update handle_copy definition. * locale/programs/linereader.c: Add handling of wide char strings and new definition file syntax. * locale/programs/linereader.h (struct token): Add elements for wide character strings. * locale/programs/locale-spec.c: Disable handling of collation elements for now. * locale/programs/simple-hash.h: Cleanup. * locale/programs/stringtrans.h: Handle quite of end of line. * string/strcoll.c: Fall back on strcmp for now. * string/strxfrm.c: Fall back on strncpy/strlen for now. * time/strftime.c: Use new wide character data for wcsftime. * time/strptime.c: Remove _nl_C_LC_TIME declaration. * wctype/cname-lookup.h: Update for new LC_CTYPE data. --- locale/programs/ld-ctype.c | 3044 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 2410 insertions(+), 634 deletions(-) (limited to 'locale/programs/ld-ctype.c') diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index 714a71898b..6743c1837c 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -1,6 +1,6 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Ulrich Drepper , 1995. + Contributed by Ulrich Drepper , 1995. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -22,183 +22,274 @@ #endif #include +#include #include +#include #include +#include +#include #include -#include +#include +#include +#include -#include "locales.h" +#include "charmap.h" #include "localeinfo.h" #include "langinfo.h" +#include "linereader.h" #include "locfile-token.h" -#include "stringtrans.h" +#include "locfile.h" +#include "localedef.h" -/* Uncomment the following line in the production version. */ -/* define NDEBUG 1 */ #include -void *xmalloc (size_t __n); -void *xcalloc (size_t __n, size_t __s); -void *xrealloc (void *__ptr, size_t __n); +/* These are the extra bits not in wctype.h since these are not preallocated + classes. */ +#define _ISwspecial1 (1 << 29) +#define _ISwspecial2 (1 << 30) +#define _ISwspecial3 (1 << 31) /* The bit used for representing a special class. */ #define BITPOS(class) ((class) - tok_upper) -#define BIT(class) (1 << BITPOS (class)) +#define BIT(class) (_ISbit (BITPOS (class))) +#define BITw(class) (_ISwbit (BITPOS (class))) #define ELEM(ctype, collection, idx, value) \ *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \ &ctype->collection##_act idx, value) -#define SWAPU32(w) \ - (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) - -#define SWAPU16(w) \ - ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8)) - /* To be compatible with former implementations we for now restrict the number of bits for character classes to 16. When compatibility is not necessary anymore increase the number to 32. */ -#define char_class_t u_int16_t -#define CHAR_CLASS_TRANS SWAPU16 -#define char_class32_t u_int32_t -#define CHAR_CLASS32_TRANS SWAPU32 +#define char_class_t uint16_t +#define CHAR_CLASS_TRANS bswap_16 +#define char_class32_t uint32_t +#define CHAR_CLASS32_TRANS bswap_32 + + +/* Type to describe a transliteration action. We have a possibly + multiple character from-string and a set of multiple character + to-strings. All are 32bit values since this is what is used in + the gconv functions. */ +struct translit_to_t +{ + uint32_t *str; + + struct translit_to_t *next; +}; + +struct translit_t +{ + uint32_t *from; + + struct translit_to_t *to; + + struct translit_t *next; +}; /* The real definition of the struct for the LC_CTYPE locale. */ struct locale_ctype_t { - unsigned int *charnames; + uint32_t *charnames; size_t charnames_max; size_t charnames_act; - /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */ -#define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1) + struct repertoire_t *repertoire; + + /* We will allow up to 8 * sizeof (uint32_t) character classes. */ +#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t)) size_t nr_charclass; const char *classnames[MAX_NR_CHARCLASS]; - unsigned long int current_class_mask; - unsigned int last_class_char; - u_int32_t *class_collection; + uint32_t last_class_char; + uint32_t class256_collection[256]; + uint32_t *class_collection; size_t class_collection_max; size_t class_collection_act; - unsigned long int class_done; + uint32_t class_done; + + struct charseq **mbdigits; + size_t mbdigits_act; + size_t mbdigits_max; + uint32_t *wcdigits; + size_t wcdigits_act; + size_t wcdigits_max; + + struct charseq *mboutdigits[10]; + uint32_t wcoutdigits[10]; + size_t outdigits_act; /* If the following number ever turns out to be too small simply increase it. But I doubt it will. --drepper@gnu */ #define MAX_NR_CHARMAP 16 const char *mapnames[MAX_NR_CHARMAP]; - u_int32_t *map_collection[MAX_NR_CHARMAP]; + uint32_t *map_collection[MAX_NR_CHARMAP]; + uint32_t map256_collection[2][256]; size_t map_collection_max[MAX_NR_CHARMAP]; size_t map_collection_act[MAX_NR_CHARMAP]; size_t map_collection_nr; size_t last_map_idx; - unsigned int from_map_char; - int toupper_done; - int tolower_done; + int tomap_done[MAX_NR_CHARMAP]; + + /* Transliteration information. */ + const char *translit_copy_locale; + const char *translit_copy_repertoire; + struct translit_t *translit; /* The arrays for the binary representation. */ - u_int32_t plane_size; - u_int32_t plane_cnt; + uint32_t plane_size; + uint32_t plane_cnt; char_class_t *ctype_b; char_class32_t *ctype32_b; - u_int32_t *names_el; - u_int32_t *names_eb; - u_int32_t **map_eb; - u_int32_t **map_el; - u_int32_t *class_name_ptr; - u_int32_t *map_name_ptr; + uint32_t *names_el; + uint32_t *names_eb; + uint32_t **map_eb; + uint32_t **map_el; + uint32_t *class_name_ptr; + uint32_t *map_name_ptr; unsigned char *width; - u_int32_t mb_cur_max; + uint32_t mb_cur_max; const char *codeset_name; + uint32_t translit_hash_size_eb; + uint32_t translit_hash_size_el; + uint32_t translit_hash_layers_eb; + uint32_t translit_hash_layers_el; + uint32_t *translit_from_idx_eb; + uint32_t *translit_from_idx_el; + uint32_t *translit_from_tbl_eb; + uint32_t *translit_from_tbl_el; + uint32_t *translit_to_idx_eb; + uint32_t *translit_to_idx_el; + uint32_t *translit_to_tbl_eb; + uint32_t *translit_to_tbl_el; + size_t translit_idx_size; + size_t translit_from_tbl_size; + size_t translit_to_tbl_size; + + struct obstack mem_pool; }; +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free + + /* Prototypes for local functions. */ -static void ctype_class_newP (struct linereader *lr, - struct locale_ctype_t *ctype, const char *name); -static void ctype_map_newP (struct linereader *lr, - struct locale_ctype_t *ctype, - const char *name, struct charset_t *charset); -static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table, - size_t *max, size_t *act, unsigned int idx); +static void ctype_startup (struct linereader *lr, struct localedef_t *locale, + struct charmap_t *charmap, int ignore_content); +static void ctype_class_new (struct linereader *lr, + struct locale_ctype_t *ctype, const char *name); +static void ctype_map_new (struct linereader *lr, + struct locale_ctype_t *ctype, + const char *name, struct charmap_t *charmap); +static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table, + size_t *max, size_t *act, unsigned int idx); static void set_class_defaults (struct locale_ctype_t *ctype, - struct charset_t *charset); + struct charmap_t *charmap, + struct repertoire_t *repertoire); static void allocate_arrays (struct locale_ctype_t *ctype, - struct charset_t *charset); + struct charmap_t *charmap, + struct repertoire_t *repertoire); -void +static const char *longnames[] = +{ + "zero", "one", "two", "three", "four", + "five", "six", "seven", "eight", "nine" +}; +static const unsigned char digits[] = "0123456789"; + + +static void ctype_startup (struct linereader *lr, struct localedef_t *locale, - struct charset_t *charset) + struct charmap_t *charmap, int ignore_content) { unsigned int cnt; struct locale_ctype_t *ctype; - /* We have a definition for LC_CTYPE. */ - copy_posix.mask &= ~(1 << LC_CTYPE); - - /* It is important that we always use UCS1 encoding for strings now. */ - encoding_method = ENC_UCS1; - - /* Allocate the needed room. */ - locale->categories[LC_CTYPE].ctype = ctype = - (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t)); - - /* We have no names seen yet. */ - ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512; - ctype->charnames = - (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int)); - for (cnt = 0; cnt < 256; ++cnt) - ctype->charnames[cnt] = cnt; - ctype->charnames_act = 256; - - /* Fill character class information. */ - ctype->nr_charclass = 0; - ctype->current_class_mask = 0; - ctype->last_class_char = ILLEGAL_CHAR_VALUE; - /* The order of the following instructions determines the bit - positions! */ - ctype_class_newP (lr, ctype, "upper"); - ctype_class_newP (lr, ctype, "lower"); - ctype_class_newP (lr, ctype, "alpha"); - ctype_class_newP (lr, ctype, "digit"); - ctype_class_newP (lr, ctype, "xdigit"); - ctype_class_newP (lr, ctype, "space"); - ctype_class_newP (lr, ctype, "print"); - ctype_class_newP (lr, ctype, "graph"); - ctype_class_newP (lr, ctype, "blank"); - ctype_class_newP (lr, ctype, "cntrl"); - ctype_class_newP (lr, ctype, "punct"); - ctype_class_newP (lr, ctype, "alnum"); - - ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512; - ctype->class_collection - = (u_int32_t *) xmalloc (sizeof (unsigned long int) - * ctype->class_collection_max); - memset (ctype->class_collection, '\0', - sizeof (unsigned long int) * ctype->class_collection_max); - ctype->class_collection_act = 256; - - /* Fill character map information. */ - ctype->map_collection_nr = 0; - ctype->last_map_idx = MAX_NR_CHARMAP; - ctype->from_map_char = ILLEGAL_CHAR_VALUE; - ctype_map_newP (lr, ctype, "toupper", charset); - ctype_map_newP (lr, ctype, "tolower", charset); - - /* Fill first 256 entries in `toupper' and `tolower' arrays. */ - for (cnt = 0; cnt < 256; ++cnt) + if (!ignore_content) { - ctype->map_collection[0][cnt] = cnt; - ctype->map_collection[1][cnt] = cnt; + /* Allocate the needed room. */ + locale->categories[LC_CTYPE].ctype = ctype = + (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t)); + + /* We have seen no names yet. */ + ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512; + ctype->charnames = + (unsigned int *) xmalloc (ctype->charnames_max + * sizeof (unsigned int)); + for (cnt = 0; cnt < 256; ++cnt) + ctype->charnames[cnt] = cnt; + ctype->charnames_act = 256; + + /* Fill character class information. */ + ctype->last_class_char = ILLEGAL_CHAR_VALUE; + /* The order of the following instructions determines the bit + positions! */ + ctype_class_new (lr, ctype, "upper"); + ctype_class_new (lr, ctype, "lower"); + ctype_class_new (lr, ctype, "alpha"); + ctype_class_new (lr, ctype, "digit"); + ctype_class_new (lr, ctype, "xdigit"); + ctype_class_new (lr, ctype, "space"); + ctype_class_new (lr, ctype, "print"); + ctype_class_new (lr, ctype, "graph"); + ctype_class_new (lr, ctype, "blank"); + ctype_class_new (lr, ctype, "cntrl"); + ctype_class_new (lr, ctype, "punct"); + ctype_class_new (lr, ctype, "alnum"); + /* The following are extensions from ISO 14652. */ + ctype_class_new (lr, ctype, "left_to_right"); + ctype_class_new (lr, ctype, "right_to_left"); + ctype_class_new (lr, ctype, "num_terminator"); + ctype_class_new (lr, ctype, "num_separator"); + ctype_class_new (lr, ctype, "segment_separator"); + ctype_class_new (lr, ctype, "block_separator"); + ctype_class_new (lr, ctype, "direction_control"); + ctype_class_new (lr, ctype, "sym_swap_layout"); + ctype_class_new (lr, ctype, "char_shape_selector"); + ctype_class_new (lr, ctype, "num_shape_selector"); + ctype_class_new (lr, ctype, "non_spacing"); + ctype_class_new (lr, ctype, "non_spacing_level3"); + ctype_class_new (lr, ctype, "normal_connect"); + ctype_class_new (lr, ctype, "r_connect"); + ctype_class_new (lr, ctype, "no_connect"); + ctype_class_new (lr, ctype, "no_connect-space"); + ctype_class_new (lr, ctype, "vowel_connect"); + + ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512; + ctype->class_collection + = (uint32_t *) xcalloc (sizeof (unsigned long int), + ctype->class_collection_max); + ctype->class_collection_act = 256; + + /* Fill character map information. */ + ctype->map_collection_nr = 0; + ctype->last_map_idx = MAX_NR_CHARMAP; + ctype_map_new (lr, ctype, "toupper", charmap); + ctype_map_new (lr, ctype, "tolower", charmap); + ctype_map_new (lr, ctype, "tosymmetric", charmap); + + /* Fill first 256 entries in `toXXX' arrays. */ + for (cnt = 0; cnt < 256; ++cnt) + { + ctype->map_collection[0][cnt] = cnt; + ctype->map_collection[1][cnt] = cnt; + ctype->map_collection[2][cnt] = cnt; + ctype->map256_collection[0][cnt] = cnt; + ctype->map256_collection[1][cnt] = cnt; + } + + obstack_init (&ctype->mem_pool); } } void -ctype_finish (struct localedef_t *locale, struct charset_t *charset) +ctype_finish (struct localedef_t *locale, struct charmap_t *charmap) { /* See POSIX.2, table 2-6 for the meaning of the following table. */ #define NCLASS 12 @@ -226,106 +317,138 @@ ctype_finish (struct localedef_t *locale, struct charset_t *charset) }; size_t cnt; int cls1, cls2; - unsigned int space_value; + uint32_t space_value; + struct charseq *space_seq; struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + int warned; /* Set default value for classes not specified. */ - set_class_defaults (ctype, charset); + set_class_defaults (ctype, charmap, ctype->repertoire); /* Check according to table. */ for (cnt = 0; cnt < ctype->class_collection_max; ++cnt) { - unsigned long int tmp; - - tmp = ctype->class_collection[cnt]; - if (tmp == 0) - continue; + uint32_t tmp = ctype->class_collection[cnt]; - for (cls1 = 0; cls1 < NCLASS; ++cls1) - if ((tmp & (1 << cls1)) != 0) - for (cls2 = 0; cls2 < NCLASS; ++cls2) - if (valid_table[cls1].allow[cls2] != '-') - { - int eq = (tmp & (1 << cls2)) != 0; - switch (valid_table[cls1].allow[cls2]) + if (tmp != 0) + { + for (cls1 = 0; cls1 < NCLASS; ++cls1) + if ((tmp & _ISwbit (cls1)) != 0) + for (cls2 = 0; cls2 < NCLASS; ++cls2) + if (valid_table[cls1].allow[cls2] != '-') { - case 'M': - if (!eq) + int eq = (tmp & _ISwbit (cls2)) != 0; + switch (valid_table[cls1].allow[cls2]) { - char buf[17]; - char *cp = buf; - unsigned int value; - - value = ctype->charnames[cnt]; - - if ((value & 0xff000000) != 0) - cp += sprintf (cp, "\\%o", (value >> 24) & 0xff); - if ((value & 0xffff0000) != 0) - cp += sprintf (cp, "\\%o", (value >> 16) & 0xff); - if ((value & 0xffffff00) != 0) - cp += sprintf (cp, "\\%o", (value >> 8) & 0xff); - sprintf (cp, "\\%o", value & 0xff); - - if (!be_quiet) - error (0, 0, _("\ -character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "", - buf, valid_table[cls1].name, - valid_table[cls2].name); + case 'M': + if (!eq) + { + uint32_t value = ctype->charnames[cnt]; + + if (!be_quiet) + error (0, 0, _("\ +character L'\\u%0*x' in class `%s' must be in class `%s'"), + value > 0xffff ? 8 : 4, value, + valid_table[cls1].name, + valid_table[cls2].name); + } + break; + + case 'X': + if (eq) + { + uint32_t value = ctype->charnames[cnt]; + + if (!be_quiet) + error (0, 0, _("\ +character L'\\u%0*x' in class `%s' must not be in class `%s'"), + value > 0xffff ? 8 : 4, value, + valid_table[cls1].name, + valid_table[cls2].name); + } + break; + + case 'D': + ctype->class_collection[cnt] |= _ISwbit (cls2); + break; + + default: + error (5, 0, _("internal error in %s, line %u"), + __FUNCTION__, __LINE__); } - break; + } + } + } + + for (cnt = 0; cnt < 256; ++cnt) + { + uint32_t tmp = ctype->class256_collection[cnt]; - case 'X': - if (eq) + if (tmp != 0) + { + for (cls1 = 0; cls1 < NCLASS; ++cls1) + if ((tmp & _ISbit (cls1)) != 0) + for (cls2 = 0; cls2 < NCLASS; ++cls2) + if (valid_table[cls1].allow[cls2] != '-') + { + int eq = (tmp & _ISbit (cls2)) != 0; + switch (valid_table[cls1].allow[cls2]) { - char buf[17]; - char *cp = buf; - unsigned int value; - - value = ctype->charnames[cnt]; - - if ((value & 0xff000000) != 0) - cp += sprintf (cp, "\\%o", value >> 24); - if ((value & 0xffff0000) != 0) - cp += sprintf (cp, "\\%o", (value >> 16) & 0xff); - if ((value & 0xffffff00) != 0) - cp += sprintf (cp, "\\%o", (value >> 8) & 0xff); - sprintf (cp, "\\%o", value & 0xff); - - if (!be_quiet) - error (0, 0, _("\ -character %s'%s' in class `%s' must not be in class `%s'"), - value > 256 ? "L" : "", buf, - valid_table[cls1].name, - valid_table[cls2].name); + case 'M': + if (!eq) + { + char buf[17]; + + sprintf (buf, "\\%o", cnt); + + if (!be_quiet) + error (0, 0, _("\ +character '%s' in class `%s' must be in class `%s'"), + buf, valid_table[cls1].name, + valid_table[cls2].name); + } + break; + + case 'X': + if (eq) + { + char buf[17]; + + sprintf (buf, "\\%o", cnt); + + if (!be_quiet) + error (0, 0, _("\ +character '%s' in class `%s' must not be in class `%s'"), + buf, valid_table[cls1].name, + valid_table[cls2].name); + } + break; + + case 'D': + ctype->class256_collection[cnt] |= _ISbit (cls2); + break; + + default: + error (5, 0, _("internal error in %s, line %u"), + __FUNCTION__, __LINE__); } - break; - - case 'D': - ctype->class_collection[cnt] |= 1 << cls2; - break; - - default: - error (5, 0, _("internal error in %s, line %u"), - __FUNCTION__, __LINE__); - } - } + } + } } /* ... and now test as a special case. */ - space_value = charset_find_value (&charset->char_table, "SP", 2); - if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE) - space_value = charset_find_value (&charset->char_table, "space", 5); - if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE) + space_value = repertoire_find_value (ctype->repertoire, "SP", 2); + if (space_value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("character not defined in character map")); } else if (((cnt = BITPOS (tok_space), (ELEM (ctype, class_collection, , space_value) - & BIT (tok_space)) == 0) + & BITw (tok_space)) == 0) || (cnt = BITPOS (tok_blank), (ELEM (ctype, class_collection, , space_value) - & BIT (tok_blank)) == 0))) + & BITw (tok_blank)) == 0))) { if (!be_quiet) error (0, 0, _(" character not in class `%s'"), @@ -333,10 +456,10 @@ character %s'%s' in class `%s' must not be in class `%s'"), } else if (((cnt = BITPOS (tok_punct), (ELEM (ctype, class_collection, , space_value) - & BIT (tok_punct)) != 0) + & BITw (tok_punct)) != 0) || (cnt = BITPOS (tok_graph), (ELEM (ctype, class_collection, , space_value) - & BIT (tok_graph)) + & BITw (tok_graph)) != 0))) { if (!be_quiet) @@ -344,24 +467,205 @@ character %s'%s' in class `%s' must not be in class `%s'"), valid_table[cnt].name); } else - ELEM (ctype, class_collection, , space_value) |= BIT (tok_print); + ELEM (ctype, class_collection, , space_value) |= BITw (tok_print); + + space_seq = charmap_find_value (charmap, "SP", 2); + if (space_seq == NULL || space_seq->nbytes != 1) + { + if (!be_quiet) + error (0, 0, _("character not defined in character map")); + } + else if (((cnt = BITPOS (tok_space), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_space)) == 0) + || (cnt = BITPOS (tok_blank), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_blank)) == 0))) + { + if (!be_quiet) + error (0, 0, _(" character not in class `%s'"), + valid_table[cnt].name); + } + else if (((cnt = BITPOS (tok_punct), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_punct)) != 0) + || (cnt = BITPOS (tok_graph), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_graph)) != 0))) + { + if (!be_quiet) + error (0, 0, _(" character must not be in class `%s'"), + valid_table[cnt].name); + } + else + ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print); /* Now that the tests are done make sure the name array contains all characters which are handled in the WIDTH section of the character set definition file. */ - if (charset->width_rules != NULL) - for (cnt = 0; cnt < charset->nwidth_rules; ++cnt) + if (charmap->width_rules != NULL) + for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) { +#if 0 size_t inner; - for (inner = charset->width_rules[cnt].from; - inner <= charset->width_rules[cnt].to; ++inner) + for (inner = charmap->width_rules[cnt].from; + inner <= charmap->width_rules[cnt].to; ++inner) (void) find_idx (ctype, NULL, NULL, NULL, inner); +#else + /* XXX Handle width. We must convert from the charseq to the + repertoire value */ + abort (); +#endif + } + + /* There must be a multiple of 10 digits. */ + if (ctype->mbdigits_act % 10 != 0) + { + assert (ctype->mbdigits_act == ctype->wcdigits_act); + ctype->wcdigits_act -= ctype->mbdigits_act % 10; + ctype->mbdigits_act -= ctype->mbdigits_act % 10; + error (0, 0, _("`digit' category has not entries in groups of ten")); + } + + /* Check the input digits. There must be a multiple of ten available. + In each group I could be that one or the other character is missing. + In this case the whole group must be removed. */ + cnt = 0; + while (cnt < ctype->mbdigits_act) + { + size_t inner; + for (inner = 0; inner < 10; ++inner) + if (ctype->mbdigits[cnt + inner] == NULL) + break; + + if (inner == 10) + cnt += 10; + else + { + /* Remove the group. */ + memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10], + ((ctype->wcdigits_act - cnt - 10) + * sizeof (ctype->mbdigits[0]))); + ctype->mbdigits_act -= 10; + } + } + + /* If no input digits are given use the default. */ + if (ctype->mbdigits_act == 0) + { + if (ctype->mbdigits_max == 0) + { + ctype->mbdigits = obstack_alloc (&charmap->mem_pool, + 10 * sizeof (struct charseq *)); + ctype->mbdigits_max = 10; + } + + for (cnt = 0; cnt < 10; ++cnt) + { + ctype->mbdigits[cnt] = charmap_find_symbol (charmap, + digits + cnt, 1); + if (ctype->mbdigits[cnt] == NULL) + { + ctype->mbdigits[cnt] = charmap_find_symbol (charmap, + longnames[cnt], + strlen (longnames[cnt])); + if (ctype->mbdigits[cnt] == NULL) + { + /* Hum, this ain't good. */ + error (0, 0, _("\ +no input digits defined and none of the standard names in the charmap")); + + ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool, + sizeof (struct charseq) + 1); + + /* This is better than nothing. */ + ctype->mbdigits[cnt]->bytes[0] = digits[cnt]; + ctype->mbdigits[cnt]->nbytes = 1; + } + } + } + + ctype->mbdigits_act = 10; + } + + /* Check the wide character input digits. There must be a multiple + of ten available. In each group I could be that one or the other + character is missing. In this case the whole group must be + removed. */ + cnt = 0; + while (cnt < ctype->wcdigits_act) + { + size_t inner; + for (inner = 0; inner < 10; ++inner) + if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE) + break; + + if (inner == 10) + cnt += 10; + else + { + /* Remove the group. */ + memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10], + ((ctype->wcdigits_act - cnt - 10) + * sizeof (ctype->wcdigits[0]))); + ctype->wcdigits_act -= 10; + } + } + + /* If no input digits are given use the default. */ + if (ctype->wcdigits_act == 0) + { + if (ctype->wcdigits_max == 0) + { + ctype->wcdigits = obstack_alloc (&charmap->mem_pool, + 10 * sizeof (uint32_t)); + ctype->wcdigits_max = 10; + } + + for (cnt = 0; cnt < 10; ++cnt) + ctype->wcdigits[cnt] = L'0' + cnt; + + ctype->mbdigits_act = 10; + } + + /* Check the outdigits. */ + warned = 0; + for (cnt = 0; cnt < 10; ++cnt) + if (ctype->mboutdigits[cnt] == NULL) + { + static struct charseq replace[2]; + + if (!warned) + { + error (0, 0, _("\ +not all characters used in `outdigit' are available in the charmap")); + warned = 1; + } + + replace[0].nbytes = 1; + replace[0].bytes[0] = '?'; + replace[0].bytes[1] = '\0'; + ctype->mboutdigits[cnt] = &replace[0]; + } + + warned = 0; + for (cnt = 0; cnt < 10; ++cnt) + if (ctype->wcoutdigits[cnt] == 0) + { + if (!warned) + { + error (0, 0, _("\ +not all characters used in `outdigit' are available in the repertoire")); + warned = 1; + } + + ctype->wcoutdigits[cnt] = L'?'; } } void -ctype_output (struct localedef_t *locale, struct charset_t *charset, +ctype_output (struct localedef_t *locale, struct charmap_t *charmap, const char *output_path) { struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; @@ -370,23 +674,12 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, struct iovec iov[2 + nelems + ctype->nr_charclass + ctype->map_collection_nr]; struct locale_file data; - u_int32_t idx[nelems]; + uint32_t idx[nelems + 1]; size_t elem, cnt, offset, total; - - - if ((locale->binary & (1 << LC_CTYPE)) != 0) - { - iov[0].iov_base = ctype; - iov[0].iov_len = locale->len[LC_CTYPE]; - - write_locale_data (output_path, "LC_CTYPE", 1, iov); - - return; - } - + char *cp; /* Now prepare the output: Find the sizes of the table we can use. */ - allocate_arrays (ctype, charset); + allocate_arrays (ctype, charmap, ctype->repertoire); data.magic = LIMAGIC (LC_CTYPE); data.n = nelems; @@ -419,20 +712,20 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, CTYPE_DATA (_NL_CTYPE_TOUPPER_EB, ctype->map_eb[0], (ctype->plane_size * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + * sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_TOLOWER_EB, ctype->map_eb[1], (ctype->plane_size * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + * sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_TOUPPER_EL, ctype->map_el[0], (ctype->plane_size * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + * sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_TOLOWER_EL, ctype->map_el[1], (ctype->plane_size * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + * sizeof (uint32_t)); CTYPE_DATA (_NL_CTYPE_CLASS32, ctype->ctype32_b, @@ -441,15 +734,88 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, CTYPE_DATA (_NL_CTYPE_NAMES_EB, ctype->names_eb, (ctype->plane_size * ctype->plane_cnt - * sizeof (u_int32_t))); + * sizeof (uint32_t))); CTYPE_DATA (_NL_CTYPE_NAMES_EL, ctype->names_el, (ctype->plane_size * ctype->plane_cnt - * sizeof (u_int32_t))); - - CTYPE_DATA (_NL_CTYPE_HASH_SIZE, - &ctype->plane_size, sizeof (u_int32_t)); - CTYPE_DATA (_NL_CTYPE_HASH_LAYERS, - &ctype->plane_cnt, sizeof (u_int32_t)); + * sizeof (uint32_t))); + + CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE_EB, + &ctype->translit_hash_size_eb, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE_EL, + &ctype->translit_hash_size_el, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS_EB, + &ctype->translit_hash_layers_eb, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS_EL, + &ctype->translit_hash_layers_el, sizeof (uint32_t)); + + CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX_EB, + ctype->translit_from_idx_eb, + ctype->translit_idx_size); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX_EL, + ctype->translit_from_idx_el, + ctype->translit_idx_size); + + CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL_EB, + ctype->translit_from_tbl_eb, + ctype->translit_from_tbl_size); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL_EL, + ctype->translit_from_tbl_el, + ctype->translit_from_tbl_size); + + CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX_EB, + ctype->translit_to_idx_eb, + ctype->translit_idx_size); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX_EL, + ctype->translit_to_idx_el, + ctype->translit_idx_size); + + CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL_EB, + ctype->translit_to_tbl_eb, ctype->translit_to_tbl_size); + CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL_EL, + ctype->translit_to_tbl_el, ctype->translit_to_tbl_size); + +#if __BYTE_ORDER == __BIG_ENDIAN + CTYPE_DATA (_NL_CTYPE_HASH_SIZE_EB, + &ctype->plane_size, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_HASH_LAYERS_EB, + &ctype->plane_cnt, sizeof (uint32_t)); +#else + case _NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE_EB): + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (sizeof (uint32_t)); + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->plane_size); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + break; + case _NL_ITEM_INDEX (_NL_CTYPE_HASH_LAYERS_EB): + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (sizeof (uint32_t)); + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->plane_cnt); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + break; +#endif +#if __BYTE_ORDER == __BIG_ENDIAN + CTYPE_DATA (_NL_CTYPE_HASH_SIZE_EL, + &ctype->plane_size, sizeof (uint32_t)); + CTYPE_DATA (_NL_CTYPE_HASH_LAYERS_EL, + &ctype->plane_cnt, sizeof (uint32_t)); +#else + case _NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE_EL): + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (sizeof (uint32_t)); + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->plane_size); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + break; + case _NL_ITEM_INDEX (_NL_CTYPE_HASH_LAYERS_EL): + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (sizeof (uint32_t)); + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->plane_cnt); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + break; +#endif case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES): /* The class name array. */ @@ -466,8 +832,7 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4)); total += 1 + (4 - ((total + 1) % 4)); - if (elem + 1 < nelems) - idx[elem + 1] = idx[elem] + total; + idx[elem + 1] = idx[elem] + total; break; case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES): @@ -485,15 +850,14 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4)); total += 1 + (4 - ((total + 1) % 4)); - if (elem + 1 < nelems) - idx[elem + 1] = idx[elem] + total; + idx[elem + 1] = idx[elem] + total; break; CTYPE_DATA (_NL_CTYPE_WIDTH, ctype->width, ctype->plane_size * ctype->plane_cnt); CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX, - &ctype->mb_cur_max, sizeof (u_int32_t)); + &ctype->mb_cur_max, sizeof (uint32_t)); case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME): total = strlen (ctype->codeset_name) + 1; @@ -508,8 +872,127 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, total = (total + 3) & ~3; } iov[2 + elem + offset].iov_len = total; - if (elem + 1 < nelems) - idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EB): + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EL): + iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t)); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + if ((elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EB) + && __BYTE_ORDER == __BIG_ENDIAN) + || (elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EL) + && __BYTE_ORDER == __LITTLE_ENDIAN)) + *(uint32_t *) iov[2 + elem + offset].iov_base = + ctype->mbdigits_act / 10; + else + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->mbdigits_act / 10); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EB): + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EL): + iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t)); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + if ((elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EB) + && __BYTE_ORDER == __BIG_ENDIAN) + || (elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EL) + && __BYTE_ORDER == __LITTLE_ENDIAN)) + *(uint32_t *) iov[2 + elem + offset].iov_base = + ctype->wcdigits_act / 10; + else + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->wcdigits_act / 10); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB): + /* Compute the length of all possible characters. For INDIGITS + there might be more than one. We simply concatenate all of + them with a NUL byte following. The NUL byte wouldn't be + necessary but it makes it easier for the user. */ + total = 0; + for (cnt = elem - _NL_CTYPE_INDIGITS0_MB; + cnt < ctype->mbdigits_act; cnt += 10) + total += ctype->mbdigits[cnt]->nbytes + 1; + iov[2 + elem + offset].iov_base = (char *) alloca (total); + iov[2 + elem + offset].iov_len = total; + + cp = iov[2 + elem + offset].iov_base; + for (cnt = elem - _NL_CTYPE_INDIGITS0_MB; + cnt < ctype->mbdigits_act; cnt += 10) + { + cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes, + ctype->mbdigits[cnt]->nbytes); + *cp++ = '\0'; + } + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB): + /* Compute the length of all possible characters. For INDIGITS + there might be more than one. We simply concatenate all of + them with a NUL byte following. The NUL byte wouldn't be + necessary but it makes it easier for the user. */ + cnt = elem - _NL_CTYPE_OUTDIGIT0_MB; + total = ctype->mboutdigits[cnt]->nbytes + 1; + iov[2 + elem + offset].iov_base = (char *) alloca (total); + iov[2 + elem + offset].iov_len = total; + + *(char *) mempcpy (iov[2 + elem + offset].iov_base, + ctype->mbdigits[cnt]->bytes, + ctype->mbdigits[cnt]->nbytes) = '\0'; + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC_EB): + total = ctype->wcdigits_act / 10; + + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (total * sizeof (uint32_t)); + iov[2 + elem + offset].iov_len = total * sizeof (uint32_t); + + for (cnt = elem - _NL_CTYPE_INDIGITS0_WC_EB; + cnt < ctype->wcdigits_act; cnt += 10) + ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10] + = (__BYTE_ORDER == __LITTLE_ENDIAN + ? bswap_32 (ctype->wcdigits[cnt]) : ctype->wcdigits[cnt]); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC_EL): + total = ctype->wcdigits_act / 10; + + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (total * sizeof (uint32_t)); + iov[2 + elem + offset].iov_len = total * sizeof (uint32_t); + + for (cnt = elem - _NL_CTYPE_INDIGITS0_WC_EL; + cnt < ctype->wcdigits_act; cnt += 10) + ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10] + = (__BYTE_ORDER == __BIG_ENDIAN + ? bswap_32 (ctype->wcdigits[cnt]) : ctype->wcdigits[cnt]); + break; + +#if __BYTE_ORDER == __BIG_ENDIAN + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EB): + cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EB; +#else + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EL): + cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EL; +#endif + iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt]; + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + break; + +#if __BYTE_ORDER == __LITTLE_ENDIAN + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EB): + cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EB; +#else + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EL): + cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EL; +#endif + iov[2 + elem + offset].iov_base = + (uint32_t *) alloca (sizeof (uint32_t)); + *(uint32_t *) iov[2 + elem + offset].iov_base = + bswap_32 (ctype->wcoutdigits[cnt]); + iov[2 + elem + offset].iov_len = sizeof (uint32_t); break; default: @@ -527,10 +1010,9 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, iov[2 + elem + offset].iov_len = ((ctype->plane_size * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + * sizeof (uint32_t)); - if (elem + 1 < nelems) - idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; } } @@ -541,596 +1023,1575 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset, } -/* Character class handling. */ -void -ctype_class_new (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, struct token *code, - struct charset_t *charset) +/* Local functions. */ +static void +ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype, + const char *name) { - ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype, - code->val.str.start); -} + size_t cnt; + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (ctype->classnames[cnt], name) == 0) + break; -int -ctype_is_charclass (struct linereader *lr, struct localedef_t *locale, - const char *name) -{ - size_t cnt; + if (cnt < ctype->nr_charclass) + { + lr_error (lr, _("character class `%s' already defined"), name); + return; + } - for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt) - if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt]) - == 0) - return 1; + if (ctype->nr_charclass == MAX_NR_CHARCLASS) + /* Exit code 2 is prescribed in P1003.2b. */ + error (2, 0, _("\ +implementation limit: no more than %d character classes allowed"), + MAX_NR_CHARCLASS); - return 0; + ctype->classnames[ctype->nr_charclass++] = name; } -void -ctype_class_start (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, const char *str, - struct charset_t *charset) +static void +ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype, + const char *name, struct charmap_t *charmap) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + size_t max_chars = 0; size_t cnt; - switch (tok) + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) { - case tok_upper: - str = "upper"; - break; - case tok_lower: - str = "lower"; - break; - case tok_alpha: - str = "alpha"; - break; - case tok_digit: - str = "digit"; - break; - case tok_xdigit: - str = "xdigit"; - break; - case tok_space: - str = "space"; - break; - case tok_print: - str = "print"; - break; - case tok_graph: - str = "graph"; - break; - case tok_blank: - str = "blank"; - break; - case tok_cntrl: - str = "cntrl"; - break; - case tok_punct: - str = "punct"; - break; - case tok_alnum: - str = "alnum"; - break; - case tok_ident: - break; - default: - assert (! "illegal token as class name: should not happen"); + if (strcmp (ctype->mapnames[cnt], name) == 0) + break; + + if (max_chars < ctype->map_collection_max[cnt]) + max_chars = ctype->map_collection_max[cnt]; } - for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) - if (strcmp (str, ctype->classnames[cnt]) == 0) - break; + if (cnt < ctype->map_collection_nr) + { + lr_error (lr, _("character map `%s' already defined"), name); + return; + } - if (cnt >= ctype->nr_charclass) - assert (! "unknown class in class definition: should not happen"); + if (ctype->map_collection_nr == MAX_NR_CHARMAP) + /* Exit code 2 is prescribed in P1003.2b. */ + error (2, 0, _("\ +implementation limit: no more than %d character maps allowed"), + MAX_NR_CHARMAP); - ctype->class_done |= BIT (tok); + ctype->mapnames[cnt] = name; + + if (max_chars == 0) + ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512; + else + ctype->map_collection_max[cnt] = max_chars; + + ctype->map_collection[cnt] = (uint32_t *) + xmalloc (sizeof (uint32_t) * ctype->map_collection_max[cnt]); + memset (ctype->map_collection[cnt], '\0', + sizeof (uint32_t) * ctype->map_collection_max[cnt]); + ctype->map_collection_act[cnt] = 256; - ctype->current_class_mask = 1 << cnt; - ctype->last_class_char = ILLEGAL_CHAR_VALUE; + ++ctype->map_collection_nr; } -void -ctype_class_from (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This + is possible if we only want ot extend the name array. */ +static uint32_t * +find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max, + size_t *act, uint32_t idx) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - unsigned int value; - - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); + size_t cnt; - ctype->last_class_char = value; + if (idx < 256) + return table == NULL ? NULL : &(*table)[idx]; - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) - /* In the LC_CTYPE category it is no error when a character is - not found. This has to be ignored silently. */ - return; + for (cnt = 256; cnt < ctype->charnames_act; ++cnt) + if (ctype->charnames[cnt] == idx) + break; - *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max, - &ctype->class_collection_act, value) - |= ctype->current_class_mask; -} + /* We have to distinguish two cases: the name is found or not. */ + if (cnt == ctype->charnames_act) + { + /* Extend the name array. */ + if (ctype->charnames_act == ctype->charnames_max) + { + ctype->charnames_max *= 2; + ctype->charnames = (unsigned int *) + xrealloc (ctype->charnames, + sizeof (unsigned int) * ctype->charnames_max); + } + ctype->charnames[ctype->charnames_act++] = idx; + } + if (table == NULL) + /* We have done everything we are asked to do. */ + return NULL; -void -ctype_class_to (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) -{ - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - unsigned int value, cnt; + if (cnt >= *act) + { + if (cnt >= *max) + { + size_t old_max = *max; + do + *max *= 2; + while (*max <= cnt); - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); + *table = + (uint32_t *) xrealloc (*table, *max * sizeof (unsigned long int)); + memset (&(*table)[old_max], '\0', + (*max - old_max) * sizeof (uint32_t)); + } - /* In the LC_CTYPE category it is no error when a character is - not found. This has to be ignored silently. */ - if ((wchar_t) ctype->last_class_char != ILLEGAL_CHAR_VALUE - && (wchar_t) value != ILLEGAL_CHAR_VALUE) - for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt) - *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max, - &ctype->class_collection_act, cnt) - |= ctype->current_class_mask; + *act = cnt; + } - ctype->last_class_char = ILLEGAL_CHAR_VALUE; + return &(*table)[cnt]; } -void -ctype_class_end (struct linereader *lr, struct localedef_t *locale) +static int +get_character (struct token *now, struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct charseq **seqp, uint32_t *wchp) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - - /* We have no special actions to perform here. */ - ctype->current_class_mask = 0; - ctype->last_class_char = ILLEGAL_CHAR_VALUE; -} + if (now->tok == tok_bsymbol) + { + /* This will hopefully be the normal case. */ + *wchp = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + *seqp = charmap_find_value (charmap, now->val.str.startmb, + now->val.str.lenmb); + } + else if (now->tok == tok_ucs4) + { + *seqp = repertoire_find_seq (repertoire, now->val.ucs4); + if (*seqp == NULL) + { + /* Compute the value in the charmap from the UCS value. */ + const char *symbol = repertoire_find_symbol (repertoire, + now->val.ucs4); -/* Character map handling. */ -void -ctype_map_new (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, struct token *code, - struct charset_t *charset) -{ - ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype, - code->val.str.start, charset); -} + if (symbol == NULL) + *seqp = NULL; + else + *seqp = charmap_find_value (charmap, symbol, strlen (symbol)); + if (*seqp == NULL) + { + /* Insert a negative entry. */ + static const struct charseq negative + = { .ucs4 = ILLEGAL_CHAR_VALUE }; + uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4); + *newp = now->val.ucs4; + + insert_entry (&repertoire->seq_table, newp, 4, + (void *) &negative); + } + else + (*seqp)->ucs4 = now->val.ucs4; + } + else if ((*seqp)->ucs4 != now->val.ucs4) + *seqp = NULL; -int -ctype_is_charconv (struct linereader *lr, struct localedef_t *locale, - const char *name) -{ - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - size_t cnt; + *wchp = now->val.ucs4; + } + else if (now->tok == tok_charcode) + { + /* We must map from the byte code to UCS4. */ + *seqp = charmap_find_symbol (charmap, now->val.str.startmb, + now->val.str.lenmb); - for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) - if (strcmp (name, ctype->mapnames[cnt]) == 0) - return 1; + if (*seqp == NULL) + *wchp = ILLEGAL_CHAR_VALUE; + else + { + if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE) + (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name, + strlen ((*seqp)->name)); + *wchp = (*seqp)->ucs4; + } + } + else + return 1; return 0; } -void -ctype_map_start (struct linereader *lr, struct localedef_t *locale, - enum token_t tok, const char *name, struct charset_t *charset) +/* Ellipsis like in `..' or `....'. */ +static void +charclass_symbolic_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, + const char *last_str, + unsigned long int class256_bit, + unsigned long int class_bit, int base, + int ignore_content, int handle_digits) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - size_t cnt; + const char *nowstr = now->val.str.startmb; + char tmp[now->val.str.lenmb + 1]; + const char *cp; + char *endp; + unsigned long int from; + unsigned long int to; - switch (tok) + /* We have to compute the ellipsis values using the symbolic names. */ + assert (last_str != NULL); + + if (strlen (last_str) != now->val.str.lenmb) { - case tok_toupper: - ctype->toupper_done = 1; - name = "toupper"; - break; - case tok_tolower: - ctype->tolower_done = 1; - name = "tolower"; - break; - case tok_ident: - break; - default: - assert (! "unknown token in category `LC_CTYPE' should not happen"); + invalid_range: + lr_error (ldfile, + _("`%s' and `%s' are no valid names for symbolic range"), + last_str, nowstr); + return; } - for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) - if (strcmp (name, ctype->mapnames[cnt]) == 0) - break; + if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0) + /* Nothing to do, the names are the same. */ + return; - if (cnt == ctype->map_collection_nr) - assert (! "unknown token in category `LC_CTYPE' should not happen"); + for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp) + ; - ctype->last_map_idx = cnt; - ctype->from_map_char = ILLEGAL_CHAR_VALUE; -} + errno = 0; + from = strtoul (cp, &endp, base); + if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0') + goto invalid_range; + to = strtoul (nowstr + (cp - last_str), &endp, base); + if ((to == UINT_MAX && errno == ERANGE) || *endp != '\0' || from >= to) + goto invalid_range; -void -ctype_map_from (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) -{ - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - unsigned int value; + /* OK, we have a range FROM - TO. Now we can create the symbolic names. */ + if (!ignore_content) + { + now->val.str.startmb = tmp; + while (++from <= to) + { + struct charseq *seq; + uint32_t wch; - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); + sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str, + last_str, now->val.str.lenmb - (cp - last_str), from); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) - /* In the LC_CTYPE category it is no error when a character is - not found. This has to be ignored silently. */ - return; + get_character (now, charmap, repertoire, &seq, &wch); + + if (seq != NULL && seq->nbytes == 1) + /* Yep, we can store information about this byte sequence. */ + ctype->class256_collection[seq->bytes[0]] |= class256_bit; - assert (ctype->last_map_idx < ctype->map_collection_nr); + if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) + /* We have the UCS4 position. */ + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; - ctype->from_map_char = value; + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + } } -void -ctype_map_to (struct linereader *lr, struct localedef_t *locale, - struct token *code, struct charset_t *charset) +/* Ellipsis like in `..'. */ +static void +charclass_ucs4_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, uint32_t last_wch, + unsigned long int class256_bit, + unsigned long int class_bit, int ignore_content, + int handle_digits) { - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; - unsigned int value; - - value = charset_find_value (&charset->char_table, code->val.str.start, - code->val.str.len); - - if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE - || (wchar_t) value == ILLEGAL_CHAR_VALUE) + if (last_wch > now->val.ucs4) { - /* In the LC_CTYPE category it is no error when a character is - not found. This has to be ignored silently. */ - ctype->from_map_char = ILLEGAL_CHAR_VALUE; + lr_error (ldfile, _("\ +to-value of range is smaller than from-value "), + (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4, + (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch); return; } - *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx], - &ctype->map_collection_max[ctype->last_map_idx], - &ctype->map_collection_act[ctype->last_map_idx], - ctype->from_map_char) = value; + if (!ignore_content) + while (++last_wch <= now->val.ucs4) + { + /* We have to find out whether there is a byte sequence corresponding + to this UCS4 value. */ + struct charseq *seq = repertoire_find_seq (repertoire, last_wch); - ctype->from_map_char = ILLEGAL_CHAR_VALUE; -} + /* If this is the first time we look for this sequence create a new + entry. */ + if (seq == NULL) + { + /* Find the symbolic name for this UCS4 value. */ + const char *symbol = repertoire_find_symbol (repertoire, last_wch); + uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4); + *newp = last_wch; + if (symbol != NULL) + /* We have a name, now search the multibyte value. */ + seq = charmap_find_value (charmap, symbol, strlen (symbol)); -void -ctype_map_end (struct linereader *lr, struct localedef_t *locale) -{ - struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + if (seq == NULL) + { + /* We have to create a fake entry. */ + static const struct charseq negative + = { .ucs4 = ILLEGAL_CHAR_VALUE }; + seq = (struct charseq *) &negative; + } + else + seq->ucs4 = last_wch; + + insert_entry (&repertoire->seq_table, newp, 4, seq); + } + + /* We have a name, now search the multibyte value. */ + if (seq->ucs4 == last_wch && seq->nbytes == 1) + /* Yep, we can store information about this byte sequence. */ + ctype->class256_collection[(size_t) seq->bytes[0]] + |= class256_bit; + + /* And of course we have the UCS4 position. */ + if (class_bit != 0 && class_bit != 0) + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, last_wch) |= class_bit; + + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch + ? seq : NULL); + ctype->wcdigits[ctype->wcdigits_act++] = last_wch; + } + else if (handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } - ctype->last_map_idx = MAX_NR_CHARMAP; - ctype->from_map_char = ILLEGAL_CHAR_VALUE; + ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch + ? seq : NULL); + ctype->wcoutdigits[ctype->outdigits_act] = last_wch; + ++ctype->outdigits_act; + } + } } -/* Local functions. */ +/* Ellipsis as in `/xea/x12.../xea/x34'. */ static void -ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype, - const char *name) +charclass_charcode_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, char *last_charcode, + uint32_t last_charcode_len, + unsigned long int class256_bit, + unsigned long int class_bit, int ignore_content, + int handle_digits) { - size_t cnt; - - for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) - if (strcmp (ctype->classnames[cnt], name) == 0) - break; + /* First check whether the to-value is larger. */ + if (now->val.charcode.nbytes != last_charcode_len) + { + lr_error (ldfile, _("\ +start end end character sequence of range must have the same length")); + return; + } - if (cnt < ctype->nr_charclass) + if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0) { - lr_error (lr, _("character class `%s' already defined"), name); + lr_error (ldfile, _("\ +to-value character sequence is smaller than from-value sequence")); return; } - if (ctype->nr_charclass == MAX_NR_CHARCLASS) - /* Exit code 2 is prescribed in P1003.2b. */ - error (2, 0, _("\ -implementation limit: no more than %d character classes allowed"), - MAX_NR_CHARCLASS); + if (!ignore_content) + { + do + { + /* Increment the byte sequence value. */ + struct charseq *seq; + uint32_t wch; + int i; + + for (i = last_charcode_len - 1; i >= 0; --i) + if (++last_charcode[i] != 0) + break; + + if (last_charcode_len == 1) + /* Of course we have the charcode value. */ + ctype->class256_collection[(size_t) last_charcode[0]] + |= class256_bit; + + /* Find the symbolic name. */ + seq = charmap_find_symbol (charmap, last_charcode, + last_charcode_len); + if (seq != NULL) + { + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + strlen (seq->name)); + wch = seq->ucs4; + + if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; + } + else + wch = ILLEGAL_CHAR_VALUE; - ctype->classnames[ctype->nr_charclass++] = name; + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + seq = xmalloc (sizeof (struct charseq) + last_charcode_len); + memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); + seq->nbytes = last_charcode_len; + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (handle_digits == 2) + { + struct charseq *seq; + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } + + seq = xmalloc (sizeof (struct charseq) + last_charcode_len); + memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); + seq->nbytes = last_charcode_len; + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + while (memcmp (last_charcode, now->val.charcode.bytes, + last_charcode_len) != 0); + } } -static void -ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype, - const char *name, struct charset_t *charset) +/* Read one transliteration entry. */ +static uint32_t * +read_widestring (struct linereader *ldfile, struct token *now, + struct charmap_t *charmap, struct repertoire_t *repertoire) { - size_t max_chars = 0; - size_t cnt; + uint32_t *wstr; - for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + if (now->tok == tok_default_missing) + /* The special name "" will denote this case. */ + wstr = (uint32_t *) L""; + else if (now->tok == tok_bsymbol) { - if (strcmp (ctype->mapnames[cnt], name) == 0) - break; - - if (max_chars < ctype->map_collection_max[cnt]) - max_chars = ctype->map_collection_max[cnt]; + /* Get the value from the repertoire. */ + wstr = xmalloc (2 * sizeof (uint32_t)); + wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + if (wstr[0] == ILLEGAL_CHAR_VALUE) + /* We cannot proceed, we don't know the UCS4 value. */ + return NULL; + + wstr[1] = 0; } - - if (cnt < ctype->map_collection_nr) + else if (now->tok == tok_ucs4) { - lr_error (lr, _("character map `%s' already defined"), name); - return; + wstr = xmalloc (2 * sizeof (uint32_t)); + wstr[0] = now->val.ucs4; + wstr[1] = 0; + } + else if (now->tok == tok_charcode) + { + /* Argh, we have to convert to the symbol name first and then to the + UCS4 value. */ + struct charseq *seq = charmap_find_symbol (charmap, + now->val.str.startmb, + now->val.str.lenmb); + if (seq == NULL) + /* Cannot find the UCS4 value. */ + return NULL; + + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + strlen (seq->name)); + if (seq->ucs4 == ILLEGAL_CHAR_VALUE) + /* We cannot proceed, we don't know the UCS4 value. */ + return NULL; + + wstr = xmalloc (2 * sizeof (uint32_t)); + wstr[0] = seq->ucs4; + wstr[1] = 0; + } + else if (now->tok == tok_string) + { + wstr = now->val.str.startwc; + if (wstr[0] == 0) + return NULL; + } + else + { + if (now->tok != tok_eol && now->tok != tok_eof) + lr_ignore_rest (ldfile, 0); + SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); + return (uint32_t *) -1l; } - if (ctype->map_collection_nr == MAX_NR_CHARMAP) - /* Exit code 2 is prescribed in P1003.2b. */ - error (2, 0, _("\ -implementation limit: no more than %d character maps allowed"), - MAX_NR_CHARMAP); + return wstr; +} - ctype->mapnames[cnt] = name; - if (max_chars == 0) - ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512; - else - ctype->map_collection_max[cnt] = max_chars; +static void +read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype, + struct token *now, struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ + uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire); + struct translit_t *result; + struct translit_to_t **top; + struct obstack *ob = &ctype->mem_pool; + int first; + int ignore; + + if (from_wstr == NULL) + /* There is no valid from string. */ + return; - ctype->map_collection[cnt] = (u_int32_t *) - xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]); - memset (ctype->map_collection[cnt], '\0', - sizeof (u_int32_t) * ctype->map_collection_max[cnt]); - ctype->map_collection_act[cnt] = 256; + result = (struct translit_t *) obstack_alloc (ob, + sizeof (struct translit_t)); + result->from = from_wstr; + result->next = NULL; + result->to = NULL; + top = &result->to; + first = 1; + ignore = 0; + + while (1) + { + uint32_t *to_wstr; + + /* Next we have one or more transliterations. They are + separated by semicolons. */ + now = lr_token (ldfile, charmap, repertoire); + + if (!first && (now->tok == tok_semicolon || now->tok == tok_eol)) + { + /* One string read. */ + const uint32_t zero = 0; + + if (!ignore) + { + obstack_grow (ob, &zero, 4); + to_wstr = obstack_finish (ob); + + *top = obstack_alloc (ob, sizeof (struct translit_to_t)); + (*top)->str = to_wstr; + (*top)->next = NULL; + } + + if (now->tok == tok_eol) + { + result->next = ctype->translit; + ctype->translit = result; + return; + } + + if (!ignore) + top = &(*top)->next; + ignore = 0; + } + else + { + to_wstr = read_widestring (ldfile, now, charmap, repertoire); + if (to_wstr == (uint32_t *) -1l) + { + /* An error occurred. */ + obstack_free (ob, result); + return; + } + + if (to_wstr == NULL) + ignore = 1; + else + /* This value is usable. */ + obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4); - ++ctype->map_collection_nr; + first = 0; + } + } } -/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This - is possible if we only want to extend the name array. */ -static u_int32_t * -find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max, - size_t *act, unsigned int idx) +/* The parser for the LC_CTYPE section of the locale definition. */ +void +ctype_read (struct linereader *ldfile, struct localedef_t *result, + struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) { + struct repertoire_t *repertoire = NULL; + struct locale_ctype_t *ctype; + struct token *now; + enum token_t nowtok; size_t cnt; + struct charseq *last_seq; + uint32_t last_wch = 0; + enum token_t last_token; + enum token_t ellipsis_token; + char last_charcode[16]; + size_t last_charcode_len = 0; + const char *last_str = NULL; + int mapidx; - if (idx < 256) - return table == NULL ? NULL : &(*table)[idx]; + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); - for (cnt = 256; cnt < ctype->charnames_act; ++cnt) - if (ctype->charnames[cnt] == idx) - break; + /* The rest of the line containing `LC_CTYPE' must be free. */ + lr_ignore_rest (ldfile, 1); - /* We have to distinguish two cases: the name is found or not. */ - if (cnt == ctype->charnames_act) + + do { - /* Extend the name array. */ - if (ctype->charnames_act == ctype->charnames_max) - { - ctype->charnames_max *= 2; - ctype->charnames = (unsigned int *) - xrealloc (ctype->charnames, - sizeof (unsigned int) * ctype->charnames_max); - } - ctype->charnames[ctype->charnames_act++] = idx; + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; } + while (nowtok == tok_eol); - if (table == NULL) - /* We have done everything we are asked to do. */ - return NULL; + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire, tok_lc_ctype, LC_CTYPE, + "LC_CTYPE", ignore_content); + return; + } - if (cnt >= *act) + /* Prepare the data structures. */ + ctype_startup (ldfile, result, charmap, ignore_content); + ctype = result->categories[LC_CTYPE].ctype; + + /* Remember the repertoire we use. */ + if (!ignore_content) + ctype->repertoire = repertoire; + + while (1) { - if (cnt >= *max) + unsigned long int class_bit = 0; + unsigned long int class256_bit = 0; + int handle_digits = 0; + + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) { - size_t old_max = *max; - do - *max *= 2; - while (*max <= cnt); + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; + continue; + } - *table = - (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int)); - memset (&(*table)[old_max], '\0', - (*max - old_max) * sizeof (u_int32_t)); + switch (nowtok) + { + case tok_class: + /* We simply forget the `class' keyword and use the following + operand to determine the bit. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_ident || now->tok == tok_string) + { + /* Must be one of the predefined class names. */ + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0) + break; + if (cnt >= ctype->nr_charclass) + { + if (now->val.str.lenmb == 8 + && memcmp ("special1", now->val.str.startmb, 8) == 0) + class_bit = _ISwspecial1; + else if (now->val.str.lenmb == 8 + && memcmp ("special2", now->val.str.startmb, 8) == 0) + class_bit = _ISwspecial2; + else if (now->val.str.lenmb == 8 + && memcmp ("special3", now->val.str.startmb, 8) == 0) + class_bit = _ISwspecial3; + else + { + lr_error (ldfile, _("\ +unknown character class `%s' in category `LC_CTYPE'"), + now->val.str.startmb); + free (now->val.str.startmb); + + lr_ignore_rest (ldfile, 0); + continue; + } + } + else + class_bit = _ISwbit (cnt); + + free (now->val.str.startmb); + } + else if (now->tok == tok_digit) + goto handle_tok_digit; + else if (now->tok < tok_upper || now->tok > tok_blank) + goto err_label; + else + { + class_bit = BITw (now->tok); + class256_bit = BIT (now->tok); + } + + /* The next character must be a semicolon. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_semicolon) + goto err_label; + goto read_charclass; + + case tok_upper: + case tok_lower: + case tok_alpha: + case tok_alnum: + case tok_space: + case tok_cntrl: + case tok_punct: + case tok_graph: + case tok_print: + case tok_xdigit: + case tok_blank: + class_bit = BITw (now->tok); + class256_bit = BIT (now->tok); + handle_digits = 0; + read_charclass: + ctype->class_done |= class_bit; + last_token = tok_none; + ellipsis_token = tok_none; + now = lr_token (ldfile, charmap, NULL); + while (now->tok != tok_eol && now->tok != tok_eof) + { + uint32_t wch; + struct charseq *seq; + + if (ellipsis_token == tok_none) + { + if (get_character (now, charmap, repertoire, &seq, &wch)) + goto err_label; + + if (!ignore_content && seq != NULL && seq->nbytes == 1) + /* Yep, we can store information about this byte + sequence. */ + ctype->class256_collection[seq->bytes[0]] |= class256_bit; + + if (!ignore_content && wch != ILLEGAL_CHAR_VALUE + && class_bit != 0) + /* We have the UCS4 position. */ + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; + + last_token = now->tok; + last_str = now->val.str.startmb; + last_seq = seq; + last_wch = wch; + memcpy (last_charcode, now->val.charcode.bytes, 16); + last_charcode_len = now->val.charcode.nbytes; + + if (!ignore_content && handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (!ignore_content && handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + goto err_label; + } + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + else + { + /* Now it gets complicated. We have to resolve the + ellipsis problem. First we must distinguish between + the different kind of ellipsis and this must match the + tokens we have seen. */ + assert (last_token != tok_none); + + if (last_token != now->tok) + { + lr_error (ldfile, _("\ +ellipsis range must be marked by two operands of same type")); + lr_ignore_rest (ldfile, 0); + break; + } + + if (last_token == tok_bsymbol) + { + if (ellipsis_token == tok_ellipsis3) + lr_error (ldfile, _("with symbolic name range values \ +the absolute ellipsis `...' must not be used")); + + charclass_symbolic_ellipsis (ldfile, ctype, charmap, + repertoire, now, last_str, + class256_bit, class_bit, + (ellipsis_token + == tok_ellipsis4 + ? 10 : 16), + ignore_content, + handle_digits); + } + else if (last_token == tok_ucs4) + { + if (ellipsis_token != tok_ellipsis2) + lr_error (ldfile, _("\ +with UCS range values one must use the hexadecimal symbolic ellipsis `..'")); + + charclass_ucs4_ellipsis (ldfile, ctype, charmap, + repertoire, now, last_wch, + class256_bit, class_bit, + ignore_content, handle_digits); + } + else + { + assert (last_token == tok_charcode); + + if (ellipsis_token != tok_ellipsis3) + lr_error (ldfile, _("\ +with character code range values one must use the absolute ellipsis `...'")); + + charclass_charcode_ellipsis (ldfile, ctype, charmap, + repertoire, now, + last_charcode, + last_charcode_len, + class256_bit, class_bit, + ignore_content, + handle_digits); + } + + /* Now we have used the last value. */ + last_token = tok_none; + } + + /* Next we expect a semicolon or the end of the line. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_eol || now->tok == tok_eof) + break; + + if (last_token != tok_none + && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4) + { + ellipsis_token = now->tok; + now = lr_token (ldfile, charmap, NULL); + continue; + } + + if (now->tok != tok_semicolon) + goto err_label; + + /* And get the next character. */ + now = lr_token (ldfile, charmap, NULL); + + ellipsis_token = tok_none; + } + break; + + case tok_digit: + handle_tok_digit: + class_bit = _ISwdigit; + class256_bit = _ISdigit; + handle_digits = 1; + goto read_charclass; + + case tok_outdigit: + if (ctype->outdigits_act != 0) + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), + "LC_CTYPE", "outdigit"); + class_bit = 0; + class256_bit = 0; + handle_digits = 2; + goto read_charclass; + + case tok_toupper: + mapidx = 0; + goto read_mapping; + + case tok_tolower: + mapidx = 1; + goto read_mapping; + + case tok_map: + /* We simply forget the `map' keyword and use the following + operand to determine the mapping. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_ident || now->tok == tok_string) + { + size_t cnt; + + for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt) + if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0) + break; + + if (cnt < ctype->map_collection_nr) + mapidx = cnt; + else + { + lr_error (ldfile, _("unknown map `%s'"), + now->val.str.startmb); + lr_ignore_rest (ldfile, 0); + break; + } + } + else if (now->tok < tok_toupper || now->tok > tok_tolower) + goto err_label; + else + mapidx = now->tok - tok_toupper; + + now = lr_token (ldfile, charmap, NULL); + /* This better should be a semicolon. */ + if (now->tok != tok_semicolon) + goto err_label; + + read_mapping: + /* Test whether this mapping was already defined. */ + if (ctype->tomap_done[mapidx]) + { + lr_error (ldfile, _("duplicated definition for mapping `%s'"), + ctype->mapnames[mapidx]); + lr_ignore_rest (ldfile, 0); + break; + } + ctype->tomap_done[mapidx] = 1; + + now = lr_token (ldfile, charmap, NULL); + while (now->tok != tok_eol && now->tok != tok_eof) + { + struct charseq *from_seq; + uint32_t from_wch; + struct charseq *to_seq; + uint32_t to_wch; + + /* Every pair starts with an opening brace. */ + if (now->tok != tok_open_brace) + goto err_label; + + /* Next comes the from-value. */ + now = lr_token (ldfile, charmap, NULL); + if (get_character (now, charmap, repertoire, &from_seq, + &from_wch) != 0) + goto err_label; + + /* The next is a comma. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_comma) + goto err_label; + + /* And the other value. */ + now = lr_token (ldfile, charmap, NULL); + if (get_character (now, charmap, repertoire, &to_seq, + &to_wch) != 0) + goto err_label; + + /* And the last thing is the closing brace. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_close_brace) + goto err_label; + + if (!ignore_content) + { + if (mapidx < 2 && from_seq != NULL && to_seq != NULL + && from_seq->nbytes == 1 && to_seq->nbytes == 1) + /* We can use this value. */ + ctype->map256_collection[mapidx][from_seq->bytes[0]] + = to_seq->bytes[0]; + + if (from_wch != ILLEGAL_CHAR_VALUE + && to_wch != ILLEGAL_CHAR_VALUE) + /* Both correct values. */ + *find_idx (ctype, &ctype->map_collection[mapidx], + &ctype->map_collection_max[mapidx], + &ctype->map_collection_act[mapidx], + from_wch) = to_wch; + } + + /* Now comes a semicolon or the end of the line/file. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_semicolon) + now = lr_token (ldfile, charmap, NULL); + } + break; + + case tok_translit_start: + /* The rest of the line better should be empty. */ + lr_ignore_rest (ldfile, 1); + + /* We count here the number of allocated entries in the `translit' + array. */ + cnt = 0; + + /* We proceed until we see the `translit_end' token. */ + while (now = lr_token (ldfile, charmap, repertoire), + now->tok != tok_translit_end && now->tok != tok_eof) + { + if (now->tok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (now->tok == tok_translit_end) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (now->tok == tok_include) + { + /* We have to include locale. */ + const char *locale_name; + const char *repertoire_name; + + now = lr_token (ldfile, charmap, NULL); + /* This should be a string or an identifier. In any + case something to name a locale. */ + if (now->tok != tok_string && now->tok != tok_ident) + { + translit_syntax: + lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE"); + lr_ignore_rest (ldfile, 0); + continue; + } + locale_name = now->val.str.startmb; + + /* Next should be a semicolon. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok != tok_semicolon) + goto translit_syntax; + + /* Now the repertoire name. */ + now = lr_token (ldfile, charmap, NULL); + if ((now->tok != tok_string && now->tok != tok_ident) + || now->val.str.startmb == NULL) + goto translit_syntax; + repertoire_name = now->val.str.startmb; + + /* We must not have more than one `include'. */ + if (ctype->translit_copy_locale != NULL) + { + lr_error (ldfile, _("\ +%s: only one `include' instruction allowed"), "LC_CTYPE"); + lr_ignore_rest (ldfile, 0); + continue; + } + + ctype->translit_copy_locale = locale_name; + ctype->translit_copy_repertoire = repertoire_name; + + /* The rest of the line must be empty. */ + lr_ignore_rest (ldfile, 1); + continue; + } + + read_translit_entry (ldfile, ctype, now, charmap, repertoire); + } + break; + + case tok_ident: + /* This could mean one of several things. First test whether + it's a character class name. */ + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0) + break; + if (cnt < ctype->nr_charclass) + { + class_bit = _ISwbit (cnt); + class256_bit = cnt <= 11 ? _ISbit (cnt) : 0; + free (now->val.str.startmb); + goto read_charclass; + } + if (strcmp (now->val.str.startmb, "special1") == 0) + { + class_bit = _ISwspecial1; + free (now->val.str.startmb); + goto read_charclass; + } + if (strcmp (now->val.str.startmb, "special2") == 0) + { + class_bit = _ISwspecial2; + free (now->val.str.startmb); + goto read_charclass; + } + if (strcmp (now->val.str.startmb, "special3") == 0) + { + class_bit = _ISwspecial3; + free (now->val.str.startmb); + goto read_charclass; + } + if (strcmp (now->val.str.startmb, "tosymmetric") == 0) + { + mapidx = 2; + goto read_mapping; + } + break; + + case tok_end: + /* Next we assume `LC_CTYPE'. */ + now = lr_token (ldfile, charmap, NULL); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_CTYPE"); + else if (now->tok != tok_lc_ctype) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_CTYPE"); + lr_ignore_rest (ldfile, now->tok == tok_lc_ctype); + return; + + default: + err_label: + if (now->tok != tok_eof) + SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); } - (*table)[cnt] = 0; - *act = cnt; + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL); + nowtok = now->tok; } - return &(*table)[cnt]; + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE"); } static void -set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset) +set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap, + struct repertoire_t *repertoire) { + size_t cnt; + /* These function defines the default values for the classes and conversions according to POSIX.2 2.5.2.1. It may seem that the order of these if-blocks is arbitrary but it is NOT. Don't move them unless you know what you do! */ - void set_default (int bit, int from, int to) + void set_default (int bitpos, int from, int to) { char tmp[2]; int ch; + int bit = _ISbit (bitpos); + int bitw = _ISwbit (bitpos); /* Define string. */ strcpy (tmp, "?"); for (ch = from; ch <= to; ++ch) { - unsigned int value; + uint32_t value; + struct charseq *seq; tmp[0] = ch; - value = charset_find_value (&charset->char_table, tmp, 1); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + value = repertoire_find_value (repertoire, tmp, 1); + if (value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - tmp); - continue; +%s: character `%s' not defined in repertoire while needed as default value"), + "LC_CTYPE", tmp); + } + else + ELEM (ctype, class_collection, , value) |= bitw; + + seq = charmap_find_value (charmap, tmp, 1); + if (seq == NULL) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined in charmap while needed as default value"), + "LC_CTYPE", tmp); } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", tmp); else - ELEM (ctype, class_collection, , value) |= bit; + ctype->class256_collection[seq->bytes[0]] |= bit; } } /* Set default values if keyword was not present. */ - if ((ctype->class_done & BIT (tok_upper)) == 0) + if ((ctype->class_done & BITw (tok_upper)) == 0) /* "If this keyword [lower] is not specified, the lowercase letters `A' through `Z', ..., shall automatically belong to this class, with implementation defined character values." [P1003.2, 2.5.2.1] */ - set_default (BIT (tok_upper), 'A', 'Z'); + set_default (BITPOS (tok_upper), 'A', 'Z'); - if ((ctype->class_done & BIT (tok_lower)) == 0) + if ((ctype->class_done & BITw (tok_lower)) == 0) /* "If this keyword [lower] is not specified, the lowercase letters `a' through `z', ..., shall automatically belong to this class, with implementation defined character values." [P1003.2, 2.5.2.1] */ - set_default (BIT (tok_lower), 'a', 'z'); + set_default (BITPOS (tok_lower), 'a', 'z'); - if ((ctype->class_done & BIT (tok_alpha)) == 0) + if ((ctype->class_done & BITw (tok_alpha)) == 0) { /* Table 2-6 in P1003.2 says that characters in class `upper' or class `lower' *must* be in class `alpha'. */ unsigned long int mask = BIT (tok_upper) | BIT (tok_lower); - size_t cnt; for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) if ((ctype->class_collection[cnt] & mask) != 0) ctype->class_collection[cnt] |= BIT (tok_alpha); } - if ((ctype->class_done & BIT (tok_digit)) == 0) + if ((ctype->class_done & BITw (tok_digit)) == 0) /* "If this keyword [digit] is not specified, the digits `0' through `9', ..., shall automatically belong to this class, with implementation-defined character values." [P1003.2, 2.5.2.1] */ - set_default (BIT (tok_digit), '0', '9'); + set_default (BITPOS (tok_digit), '0', '9'); /* "Only characters specified for the `alpha' and `digit' keyword shall be specified. Characters specified for the keyword `alpha' and `digit' are automatically included in this class. */ { unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit); - size_t cnt; for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) if ((ctype->class_collection[cnt] & mask) != 0) ctype->class_collection[cnt] |= BIT (tok_alnum); } - if ((ctype->class_done & BIT (tok_space)) == 0) + if ((ctype->class_done & BITw (tok_space)) == 0) /* "If this keyword [space] is not specified, the characters , , , , , and , ..., shall automatically belong to this class, with implementation-defined character values." [P1003.2, 2.5.2.1] */ { - unsigned int value; + uint32_t value; + struct charseq *seq; - value = charset_find_value (&charset->char_table, "space", 5); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + value = repertoire_find_value (repertoire, "space", 5); + if (value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - ""); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (&charset->char_table, "form-feed", 9); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - ""); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", ""); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + + value = repertoire_find_value (repertoire, "form-feed", 9); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (&charset->char_table, "newline", 7); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "form-feed", 9); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - ""); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", ""); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + + value = repertoire_find_value (repertoire, "newline", 7); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (&charset->char_table, "carriage-return", 15); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "newline", 7); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ character `%s' not defined while needed as default value"), - ""); + ""); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", ""); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + + value = repertoire_find_value (repertoire, "carriage-return", 15); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (&charset->char_table, "tab", 3); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "carriage-return", 15); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - ""); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", ""); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + + value = repertoire_find_value (repertoire, "tab", 3); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); - value = charset_find_value (&charset->char_table, "vertical-tab", 12); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "tab", 3); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - ""); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", ""); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + + value = repertoire_find_value (repertoire, "vertical-tab", 12); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); } else ELEM (ctype, class_collection, , value) |= BIT (tok_space); + + seq = charmap_find_value (charmap, "vertical-tab", 12); + if (seq == NULL) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", ""); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); } - if ((ctype->class_done & BIT (tok_xdigit)) == 0) + if ((ctype->class_done & BITw (tok_xdigit)) == 0) /* "If this keyword is not specified, the digits `0' to `9', the uppercase letters `A' through `F', and the lowercase letters `a' through `f', ..., shell automatically belong to this class, with implementation defined character values." [P1003.2, 2.5.2.1] */ { - set_default (BIT (tok_xdigit), '0', '9'); - set_default (BIT (tok_xdigit), 'A', 'F'); - set_default (BIT (tok_xdigit), 'a', 'f'); + set_default (BITPOS (tok_xdigit), '0', '9'); + set_default (BITPOS (tok_xdigit), 'A', 'F'); + set_default (BITPOS (tok_xdigit), 'a', 'f'); } - if ((ctype->class_done & BIT (tok_blank)) == 0) + if ((ctype->class_done & BITw (tok_blank)) == 0) /* "If this keyword [blank] is unspecified, the characters and shall belong to this character class." [P1003.2, 2.5.2.1] */ { - unsigned int value; + uint32_t value; + struct charseq *seq; - value = charset_find_value (&charset->char_table, "space", 5); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + value = repertoire_find_value (repertoire, "space", 5); + if (value == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - ""); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); } else ELEM (ctype, class_collection, , value) |= BIT (tok_blank); - value = charset_find_value (&charset->char_table, "tab", 3); - if ((wchar_t) value == ILLEGAL_CHAR_VALUE) + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - ""); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", ""); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank); + + + value = repertoire_find_value (repertoire, "tab", 3); + if (value == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); } else ELEM (ctype, class_collection, , value) |= BIT (tok_blank); + + seq = charmap_find_value (charmap, "tab", 3); + if (seq == NULL) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", ""); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank); } - if ((ctype->class_done & BIT (tok_graph)) == 0) + if ((ctype->class_done & BITw (tok_graph)) == 0) /* "If this keyword [graph] is not specified, characters specified for the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct', shall belong to this character class." [P1003.2, 2.5.2.1] */ @@ -1142,9 +2603,13 @@ character `%s' not defined while needed as default value"), for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) if ((ctype->class_collection[cnt] & mask) != 0) ctype->class_collection[cnt] |= BIT (tok_graph); + + for (cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_graph); } - if ((ctype->class_done & BIT (tok_print)) == 0) + if ((ctype->class_done & BITw (tok_print)) == 0) /* "If this keyword [print] is not provided, characters specified for the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct', and the character shall belong to this character class." @@ -1153,25 +2618,46 @@ character `%s' not defined while needed as default value"), unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) | BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct); size_t cnt; - wchar_t space; + uint32_t space; + struct charseq *seq; for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) if ((ctype->class_collection[cnt] & mask) != 0) ctype->class_collection[cnt] |= BIT (tok_print); - space = charset_find_value (&charset->char_table, "space", 5); + for (cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_print); + + + space = repertoire_find_value (repertoire, "space", 5); if (space == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - ""); +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); } else ELEM (ctype, class_collection, , space) |= BIT (tok_print); + + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", ""); + } + else if (seq->nbytes != 1) + error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", ""); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print); } - if (ctype->toupper_done == 0) + if (ctype->tomap_done[0] == 0) /* "If this keyword [toupper] is not specified, the lowercase letters `a' through `z', and their corresponding uppercase letters `A' to `Z', ..., shall automatically be included, with implementation- @@ -1184,55 +2670,133 @@ character `%s' not defined while needed as default value"), for (ch = 'a'; ch <= 'z'; ++ch) { - unsigned int value_from, value_to; + uint32_t value_from, value_to; + struct charseq *seq_from, *seq_to; tmp[1] = (char) ch; - value_from = charset_find_value (&charset->char_table, &tmp[1], 1); - if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE) + value_from = repertoire_find_value (repertoire, &tmp[1], 1); + if (value_from == ILLEGAL_CHAR_VALUE) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - tmp); - continue; +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp); + } + else + { + /* This conversion is implementation defined. */ + tmp[1] = (char) (ch + ('A' - 'a')); + value_to = repertoire_find_value (repertoire, &tmp[1], 1); + if (value_to == ILLEGAL_CHAR_VALUE) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp); + } + else + /* The index [0] is determined by the order of the + `ctype_map_newP' calls in `ctype_startup'. */ + ELEM (ctype, map_collection, [0], value_from) = value_to; } - /* This conversion is implementation defined. */ - tmp[1] = (char) (ch + ('A' - 'a')); - value_to = charset_find_value (&charset->char_table, &tmp[1], 1); - if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE) + seq_from = charmap_find_value (charmap, &tmp[1], 1); + if (seq_from == NULL) { if (!be_quiet) error (0, 0, _("\ -character `%s' not defined while needed as default value"), - tmp); - continue; +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp); + } + else if (seq_from->nbytes != 1) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' needed as default value not representable with one byte"), + "LC_CTYPE", tmp); + } + else + { + /* This conversion is implementation defined. */ + tmp[1] = (char) (ch + ('A' - 'a')); + seq_to = charmap_find_value (charmap, &tmp[1], 1); + if (seq_to == NULL) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp); + } + else if (seq_to->nbytes != 1) + { + if (!be_quiet) + error (0, 0, _("\ +%s: character `%s' needed as default value not representable with one byte"), + "LC_CTYPE", tmp); + } + else + /* The index [0] is determined by the order of the + `ctype_map_newP' calls in `ctype_startup'. */ + ctype->map256_collection[0][seq_from->bytes[0]] + = seq_to->bytes[0]; } - - /* The index [0] is determined by the order of the - `ctype_map_newP' calls in `ctype_startup'. */ - ELEM (ctype, map_collection, [0], value_from) = value_to; } } - if (ctype->tolower_done == 0) + if (ctype->tomap_done[1] == 0) /* "If this keyword [tolower] is not specified, the mapping shall be the reverse mapping of the one specified to `toupper'." [P1003.2] */ { - size_t cnt; - for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt) if (ctype->map_collection[0][cnt] != 0) ELEM (ctype, map_collection, [1], ctype->map_collection[0][cnt]) = ctype->charnames[cnt]; + + for (cnt = 0; cnt < 256; ++cnt) + if (ctype->map256_collection[0][cnt] != 0) + ctype->map_collection[1][ctype->map_collection[0][cnt]] + = ctype->charnames[cnt]; + } + + if (ctype->outdigits_act == 0) + { + for (cnt = 0; cnt < 10; ++cnt) + { + ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, + digits + cnt, 1); + + if (ctype->mboutdigits[cnt] == NULL) + { + ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, + longnames[cnt], + strlen (longnames[cnt])); + + if (ctype->mboutdigits[cnt] == NULL) + { + /* Provide a replacement. */ + error (0, 0, _("\ +no output digits defined and none of the standard names in the charmap")); + + ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool, + sizeof (struct charseq) + 1); + + /* This is better than nothing. */ + ctype->mboutdigits[cnt]->bytes[0] = digits[cnt]; + ctype->mboutdigits[cnt]->nbytes = 1; + } + } + } + + ctype->outdigits_act = 10; } } static void -allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset) +allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap, + struct repertoire_t *repertoire) { size_t idx; @@ -1300,12 +2864,12 @@ Computing table size for character classes might take a while..."), # define NAMES_B2 ctype->names_el #endif - ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size - * ctype->plane_cnt, - sizeof (u_int32_t)); - ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size - * ctype->plane_cnt, - sizeof (u_int32_t)); + ctype->names_eb = (uint32_t *) xcalloc (ctype->plane_size + * ctype->plane_cnt, + sizeof (uint32_t)); + ctype->names_el = (uint32_t *) xcalloc (ctype->plane_size + * ctype->plane_cnt, + sizeof (uint32_t)); for (idx = 1; idx < 256; ++idx) NAMES_B1[idx] = idx; @@ -1330,7 +2894,7 @@ Computing table size for character classes might take a while..."), NAMES_B1[0] = 0; for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx) - NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]); + NAMES_B2[idx] = bswap_32 (NAMES_B1[idx]); /* You wonder about this amount of memory? This is only because some @@ -1353,10 +2917,9 @@ Computing table size for character classes might take a while..."), # define TRANS32(w) (w) #endif - for (idx = 0; idx < ctype->class_collection_act; ++idx) - if (ctype->charnames[idx] < 256) - ctype->ctype_b[128 + ctype->charnames[idx]] - = TRANS (ctype->class_collection[idx]); + /* This is the array accessed usig the multibyte string elements. */ + for (idx = 0; idx < 256; ++idx) + ctype->ctype_b[128 + idx] = TRANS (ctype->class256_collection[idx]); /* Mirror first 127 entries. We must take care that entry -1 is not mirrored because EOF == -1. */ @@ -1369,10 +2932,10 @@ Computing table size for character classes might take a while..."), = TRANS32 (ctype->class_collection[idx]); /* Room for table of mappings. */ - ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr - * sizeof (u_int32_t *)); - ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr - * sizeof (u_int32_t *)); + ctype->map_eb = (uint32_t **) xmalloc (ctype->map_collection_nr + * sizeof (uint32_t *)); + ctype->map_el = (uint32_t **) xmalloc (ctype->map_collection_nr + * sizeof (uint32_t *)); /* Fill in all mappings. */ for (idx = 0; idx < ctype->map_collection_nr; ++idx) @@ -1380,12 +2943,12 @@ Computing table size for character classes might take a while..."), unsigned int idx2; /* Allocate table. */ - ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size - * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); - ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size - * ctype->plane_cnt + 128) - * sizeof (u_int32_t)); + ctype->map_eb[idx] = (uint32_t *) xmalloc ((ctype->plane_size + * ctype->plane_cnt + 128) + * sizeof (uint32_t)); + ctype->map_el[idx] = (uint32_t *) xmalloc ((ctype->plane_size + * ctype->plane_cnt + 128) + * sizeof (uint32_t)); #if __BYTE_ORDER == __LITTLE_ENDIAN # define MAP_B1 ctype->map_el @@ -1397,13 +2960,11 @@ Computing table size for character classes might take a while..."), /* Copy default value (identity mapping). */ memcpy (&MAP_B1[idx][128], NAMES_B1, - ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t)); + ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t)); /* Copy values from collection. */ - for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2) - if (ctype->map_collection[idx][idx2] != 0) - MAP_B1[idx][128 + ctype->charnames[idx2]] = - ctype->map_collection[idx][idx2]; + for (idx2 = 0; idx2 < 256; ++idx2) + MAP_B1[idx][128 + idx2] = ctype->map256_collection[idx][idx2]; /* Mirror first 127 entries. We must take care not to map entry -1 because EOF == -1. */ @@ -1415,14 +2976,14 @@ Computing table size for character classes might take a while..."), /* And now the other byte order. */ for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2) - MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]); + MAP_B2[idx][idx2] = bswap_32 (MAP_B1[idx][idx2]); } /* Extra array for class and map names. */ - ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass - * sizeof (u_int32_t)); - ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr - * sizeof (u_int32_t)); + ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass + * sizeof (uint32_t)); + ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr + * sizeof (uint32_t)); /* Array for width information. Because the expected width are very small we use only one single byte. This save space and we need @@ -1430,16 +2991,17 @@ Computing table size for character classes might take a while..."), ctype->width = (unsigned char *) xmalloc (ctype->plane_size * ctype->plane_cnt); /* Initialize with default width value. */ - memset (ctype->width, charset->width_default, + memset (ctype->width, charmap->width_default, ctype->plane_size * ctype->plane_cnt); - if (charset->width_rules != NULL) + if (charmap->width_rules != NULL) { +#if 0 size_t cnt; - for (cnt = 0; cnt < charset->nwidth_rules; ++cnt) - if (charset->width_rules[cnt].width != charset->width_default) - for (idx = charset->width_rules[cnt].from; - idx <= charset->width_rules[cnt].to; ++idx) + for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) + if (charmap->width_rules[cnt].width != charmap->width_default) + for (idx = charmap->width_rules[cnt].from; + idx <= charmap->width_rules[cnt].to; ++idx) { size_t nr = idx % ctype->plane_size; size_t depth = 0; @@ -1449,15 +3011,229 @@ Computing table size for character classes might take a while..."), assert (depth < ctype->plane_cnt); ctype->width[nr + depth * ctype->plane_size] - = charset->width_rules[cnt].width; + = charmap->width_rules[cnt].width; } +#else + abort (); +#endif } - /* Compute MB_CUR_MAX. */ - ctype->mb_cur_max = charset->mb_cur_max; + /* Set MB_CUR_MAX. */ + ctype->mb_cur_max = charmap->mb_cur_max; /* We need the name of the currently used 8-bit character set to make correct conversion between this 8-bit representation and the ISO 10646 character set used internally for wide characters. */ - ctype->codeset_name = charset->code_set_name ? : ""; + ctype->codeset_name = charmap->code_set_name; + + /* Now determine the table for the transliteration information. + + XXX It is not yet clear to me whether it is worth implementing a + complicated algorithm which uses a hash table to locate the entries. + For now I'll use a simple array which can be searching using binary + search. */ + if (ctype->translit_copy_locale != NULL) + { + /* Fold in the transliteration information from the locale mentioned + in the `include' statement. */ + struct locale_ctype_t *here = ctype; + + do + { + struct localedef_t *other = find_locale (LC_CTYPE, + here->translit_copy_locale, + repertoire->name, charmap); + + if (other == NULL) + { + error (0, 0, _("\ +%s: transliteration data from locale `%s' not available"), + "LC_CTYPE", here->translit_copy_locale); + break; + } + + here = other->categories[LC_CTYPE].ctype; + + /* Enqueue the information if necessary. */ + if (here->translit != NULL) + { + struct translit_t *endp = here->translit; + while (endp->next != NULL) + endp = endp->next; + + endp->next = ctype->translit; + ctype->translit = here->translit; + } + } + while (here->translit_copy_locale != NULL); + } + + if (ctype->translit != NULL) + { + /* First count how many entries we have. This is the upper limit + since some entries from the included files might be overwritten. */ + size_t number = 0; + size_t cnt; + struct translit_t *runp = ctype->translit; + struct translit_t **sorted; + size_t from_len, to_len; + + while (runp != NULL) + { + ++number; + runp = runp->next; + } + + /* Next we allocate an array large enough and fill in the values. */ + sorted = alloca (number * sizeof (struct translit_t **)); + runp = ctype->translit; + number = 0; + do + { + /* Search for the place where to insert this string. + XXX Better use a real sorting algorithm later. */ + size_t idx = 0; + int replace = 0; + + while (idx < number) + { + int res = wcscmp ((const wchar_t *) sorted[idx]->from, + (const wchar_t *) runp->from); + if (res == 0) + { + replace = 1; + break; + } + if (res > 0) + break; + ++idx; + } + + if (replace) + sorted[idx] = runp; + else + { + memmove (&sorted[idx + 1], &sorted[idx], + (number - idx) * sizeof (struct translit_t *)); + sorted[idx] = runp; + ++number; + } + + runp = runp->next; + } + while (runp != NULL); + + /* The next step is putting all the possible transliteration + strings in one memory block so that we can write it out. + We need several different blocks: + - index to the tfromstring array + - from-string array + - index to the to-string array + - to-string array. + And this all must be available for both endianes variants. + */ + from_len = to_len = 0; + for (cnt = 0; cnt < number; ++cnt) + { + struct translit_to_t *srunp; + from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1; + srunp = sorted[cnt]->to; + while (srunp != NULL) + { + to_len += wcslen ((const wchar_t *) srunp->str) + 1; + srunp = srunp->next; + } + /* Plus one for the extra NUL character marking the end of + the list for the current entry. */ + ++to_len; + } + + /* We can allocate the arrays for the results. */ +#if BYTE_ORDER == LITTLE_ENDIAN +# define from_idx translit_from_idx_el +# define from_tbl translit_from_tbl_el +# define to_idx translit_to_idx_el +# define to_tbl translit_to_tbl_el +# define from_idx_ob translit_from_idx_eb +# define from_tbl_ob translit_from_tbl_eb +# define to_idx_ob translit_to_idx_eb +# define to_tbl_ob translit_to_tbl_eb +#else +# define from_idx translit_from_idx_eb +# define from_tbl translit_from_tbl_eb +# define to_idx translit_to_idx_eb +# define to_tbl translit_to_tbl_eb +# define from_idx_ob translit_from_idx_el +# define from_tbl_ob translit_from_tbl_el +# define to_idx_ob translit_to_idx_el +# define to_tbl_ob translit_to_tbl_el +#endif + ctype->from_idx = xmalloc (number * sizeof (uint32_t)); + ctype->from_idx_ob = xmalloc (number * sizeof (uint32_t)); + ctype->from_tbl = xmalloc (from_len * sizeof (uint32_t)); + ctype->from_tbl_ob = xmalloc (from_len * sizeof (uint32_t)); + ctype->to_idx = xmalloc (number * sizeof (uint32_t)); + ctype->to_idx_ob = xmalloc (number * sizeof (uint32_t)); + ctype->to_tbl = xmalloc (to_len * sizeof (uint32_t)); + ctype->to_tbl_ob = xmalloc (to_len * sizeof (uint32_t)); + + from_len = 0; + to_len = 0; + for (cnt = 0; cnt < number; ++cnt) + { + size_t len; + struct translit_to_t *srunp; + + ctype->from_idx[cnt] = from_len; + ctype->to_idx[cnt] = to_len; + + len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1; + wmemcpy ((wchar_t *) &ctype->from_tbl[from_len], + (const wchar_t *) sorted[cnt]->from, len); + from_len += len; + + ctype->to_idx[cnt] = to_len; + srunp = sorted[cnt]->to; + while (srunp != NULL) + { + len = wcslen ((const wchar_t *) srunp->str) + 1; + wmemcpy ((wchar_t *) &ctype->to_tbl[to_len], + (const wchar_t *) srunp->str, len); + to_len += len; + srunp = srunp->next; + } + ctype->to_tbl[to_len++] = L'\0'; + } + + /* Now create the tables for the other endianess. */ + for (cnt = 0; cnt < number; ++cnt) + { + ctype->from_idx_ob[cnt] = bswap_32 (ctype->from_idx[cnt]); + ctype->to_idx_ob[cnt] = bswap_32 (ctype->to_idx[cnt]); + } + for (cnt = 0; cnt < from_len; ++cnt) + ctype->from_tbl[cnt] = bswap_32 (ctype->from_tbl_ob[cnt]); + for (cnt = 0; cnt < to_len; ++cnt) + ctype->to_tbl[cnt] = bswap_32 (ctype->to_tbl_ob[cnt]); + + /* Store the information about the length. */ + ctype->translit_idx_size = number * sizeof (uint32_t); + ctype->translit_from_tbl_size = from_len * sizeof (uint32_t); + ctype->translit_to_tbl_size = to_len * sizeof (uint32_t); + } + else + { + /* Provide some dummy pointers since we have nothing to write out. */ + static uint32_t no_str = { 0 }; + + ctype->translit_from_idx_el = &no_str; + ctype->translit_from_idx_eb = &no_str; + ctype->translit_from_tbl_el = &no_str; + ctype->translit_from_tbl_eb = &no_str; + ctype->translit_to_tbl_el = &no_str; + ctype->translit_to_tbl_eb = &no_str; + ctype->translit_idx_size = 0; + ctype->translit_from_tbl_size = 0; + ctype->translit_to_tbl_size = 0; + } } -- cgit v1.2.3