aboutsummaryrefslogtreecommitdiff
path: root/locale/programs/ld-ctype.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-08-31 07:04:41 +0000
committerUlrich Drepper <drepper@redhat.com>1999-08-31 07:04:41 +0000
commit4b10dd6c1959577f57850ca427a94fe22b9f3299 (patch)
treeb385d9b27e5a40d5baf7cd7e27c7cc5ef7129b5b /locale/programs/ld-ctype.c
parent1d1740d6b12894ed6a430e2e98bf73c5243b2925 (diff)
downloadglibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.tar
glibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.tar.gz
glibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.tar.bz2
glibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.zip
Update.
* locale/Makefile (distribute): Add iso-639.def and iso-3166.def. Change charset.h to charmap.h. (categories): Add new categories. Leave out collate for now. Update build rules. * locale/categories.def: Add definitions for new categories. * locale/langinfo.h: Likewise. * locale/locale.h: Likewise. * locale/C-address.c: New file. * locale/C-identification.c: New file. * locale/C-measurement.c: New file. * locale/C-name.c: New file. * locale/C-paper.c: New file. * locale/C-telephone.c: New file. * locale/lc-address.c: Likewise. * locale/lc-identification.c: Likewise. * locale/lc-measurement.c: Likewise. * locale/lc-name.c: Likewise. * locale/lc-paper.c: Likewise. * locale/lc-telephone.c: Likewise. * locale/C-ctype.c: Update for locale rewrite. * locale/C-messages.c: Likewise. * locale/C-monetary.c: Likewise. * locale/C-time.c: Likewise. * locale/lc-collate.c: Likewise. * locale/lc-ctype.c: Likewise. * locale/lc-monetary.c: Likewise. * locale/lc-time.c: Likewise. * locale/localeinfo.h: Likewise. * locale/newlocale.c: Likewise. * locale/setlocale.c: Likewise. * locale/weight.h: Likewise. * locale/findlocale.c: Unconditionally use mmap. Handle new categories. * locale/loadlocale.c: Likewise. * locale/iso-3166.def: New file. * locale/iso-639.def: New file. * locale/programs/charmap-kw.gperf: Add new keywords. * locale/programs/locfile-kw.gperf: Likewise. * locale/programs/locfile-token.h: Define new tokens. * locale/programs/charmap.c: Rewrite to handle multibyte charsets. * locale/programs/charmap.h: New file. * locale/programs/charset.h: Removed. * locale/programs/config.h: Add __LC_LAST. * locale/programs/lc-address.c: New file. * locale/programs/lc-identification.c: New file. * locale/programs/lc-measurement.c: New file. * locale/programs/lc-name.c: New file. * locale/programs/lc-paper.c: New file. * locale/programs/lc-telephone.c: New file. * locale/programs/lc-collate.c: Update for locale rewrite. * locale/programs/lc-ctype.c: Likewise. * locale/programs/lc-messages.c: Likewise. * locale/programs/lc-monetary.c: Likewise. * locale/programs/lc-numeric.c: Likewise. * locale/programs/lc-time.c: Likewise. * locale/programs/locale.c: Likewise. * locale/programs/localedef.c: Likewise. * locale/programs/locfile.c: Likewise. * locale/programs/repertoire.c: Likewise. * locale/programs/repertoire.h: Likewise. * locale/programs/locfile.c: Update prototypes. Update handle_copy definition. * locale/programs/linereader.c: Add handling of wide char strings and new definition file syntax. * locale/programs/linereader.h (struct token): Add elements for wide character strings. * locale/programs/locale-spec.c: Disable handling of collation elements for now. * locale/programs/simple-hash.h: Cleanup. * locale/programs/stringtrans.h: Handle quite of end of line. * string/strcoll.c: Fall back on strcmp for now. * string/strxfrm.c: Fall back on strncpy/strlen for now. * time/strftime.c: Use new wide character data for wcsftime. * time/strptime.c: Remove _nl_C_LC_TIME declaration. * wctype/cname-lookup.h: Update for new LC_CTYPE data.
Diffstat (limited to 'locale/programs/ld-ctype.c')
-rw-r--r--locale/programs/ld-ctype.c3042
1 files changed, 2409 insertions, 633 deletions
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
index 714a71898b..6743c1837c 100644
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@@ -1,6 +1,6 @@
/* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
@@ -22,183 +22,274 @@
#endif
#include <alloca.h>
+#include <byteswap.h>
#include <endian.h>
+#include <errno.h>
#include <limits.h>
+#include <obstack.h>
+#include <stdlib.h>
#include <string.h>
-#include <libintl.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <sys/uio.h>
-#include "locales.h"
+#include "charmap.h"
#include "localeinfo.h"
#include "langinfo.h"
+#include "linereader.h"
#include "locfile-token.h"
-#include "stringtrans.h"
+#include "locfile.h"
+#include "localedef.h"
-/* Uncomment the following line in the production version. */
-/* define NDEBUG 1 */
#include <assert.h>
-void *xmalloc (size_t __n);
-void *xcalloc (size_t __n, size_t __s);
-void *xrealloc (void *__ptr, size_t __n);
+/* These are the extra bits not in wctype.h since these are not preallocated
+ classes. */
+#define _ISwspecial1 (1 << 29)
+#define _ISwspecial2 (1 << 30)
+#define _ISwspecial3 (1 << 31)
/* The bit used for representing a special class. */
#define BITPOS(class) ((class) - tok_upper)
-#define BIT(class) (1 << BITPOS (class))
+#define BIT(class) (_ISbit (BITPOS (class)))
+#define BITw(class) (_ISwbit (BITPOS (class)))
#define ELEM(ctype, collection, idx, value) \
*find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
&ctype->collection##_act idx, value)
-#define SWAPU32(w) \
- (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
-
-#define SWAPU16(w) \
- ((((w) >> 8) & 0xff) | (((w) & 0xff) << 8))
-
/* To be compatible with former implementations we for now restrict
the number of bits for character classes to 16. When compatibility
is not necessary anymore increase the number to 32. */
-#define char_class_t u_int16_t
-#define CHAR_CLASS_TRANS SWAPU16
-#define char_class32_t u_int32_t
-#define CHAR_CLASS32_TRANS SWAPU32
+#define char_class_t uint16_t
+#define CHAR_CLASS_TRANS bswap_16
+#define char_class32_t uint32_t
+#define CHAR_CLASS32_TRANS bswap_32
+
+
+/* Type to describe a transliteration action. We have a possibly
+ multiple character from-string and a set of multiple character
+ to-strings. All are 32bit values since this is what is used in
+ the gconv functions. */
+struct translit_to_t
+{
+ uint32_t *str;
+
+ struct translit_to_t *next;
+};
+
+struct translit_t
+{
+ uint32_t *from;
+
+ struct translit_to_t *to;
+
+ struct translit_t *next;
+};
/* The real definition of the struct for the LC_CTYPE locale. */
struct locale_ctype_t
{
- unsigned int *charnames;
+ uint32_t *charnames;
size_t charnames_max;
size_t charnames_act;
- /* We will allow up to 8 * sizeof(u_int32_t) - 1 character classes. */
-#define MAX_NR_CHARCLASS (8 * sizeof (u_int32_t) - 1)
+ struct repertoire_t *repertoire;
+
+ /* We will allow up to 8 * sizeof (uint32_t) character classes. */
+#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
size_t nr_charclass;
const char *classnames[MAX_NR_CHARCLASS];
- unsigned long int current_class_mask;
- unsigned int last_class_char;
- u_int32_t *class_collection;
+ uint32_t last_class_char;
+ uint32_t class256_collection[256];
+ uint32_t *class_collection;
size_t class_collection_max;
size_t class_collection_act;
- unsigned long int class_done;
+ uint32_t class_done;
+
+ struct charseq **mbdigits;
+ size_t mbdigits_act;
+ size_t mbdigits_max;
+ uint32_t *wcdigits;
+ size_t wcdigits_act;
+ size_t wcdigits_max;
+
+ struct charseq *mboutdigits[10];
+ uint32_t wcoutdigits[10];
+ size_t outdigits_act;
/* If the following number ever turns out to be too small simply
increase it. But I doubt it will. --drepper@gnu */
#define MAX_NR_CHARMAP 16
const char *mapnames[MAX_NR_CHARMAP];
- u_int32_t *map_collection[MAX_NR_CHARMAP];
+ uint32_t *map_collection[MAX_NR_CHARMAP];
+ uint32_t map256_collection[2][256];
size_t map_collection_max[MAX_NR_CHARMAP];
size_t map_collection_act[MAX_NR_CHARMAP];
size_t map_collection_nr;
size_t last_map_idx;
- unsigned int from_map_char;
- int toupper_done;
- int tolower_done;
+ int tomap_done[MAX_NR_CHARMAP];
+
+ /* Transliteration information. */
+ const char *translit_copy_locale;
+ const char *translit_copy_repertoire;
+ struct translit_t *translit;
/* The arrays for the binary representation. */
- u_int32_t plane_size;
- u_int32_t plane_cnt;
+ uint32_t plane_size;
+ uint32_t plane_cnt;
char_class_t *ctype_b;
char_class32_t *ctype32_b;
- u_int32_t *names_el;
- u_int32_t *names_eb;
- u_int32_t **map_eb;
- u_int32_t **map_el;
- u_int32_t *class_name_ptr;
- u_int32_t *map_name_ptr;
+ uint32_t *names_el;
+ uint32_t *names_eb;
+ uint32_t **map_eb;
+ uint32_t **map_el;
+ uint32_t *class_name_ptr;
+ uint32_t *map_name_ptr;
unsigned char *width;
- u_int32_t mb_cur_max;
+ uint32_t mb_cur_max;
const char *codeset_name;
+ uint32_t translit_hash_size_eb;
+ uint32_t translit_hash_size_el;
+ uint32_t translit_hash_layers_eb;
+ uint32_t translit_hash_layers_el;
+ uint32_t *translit_from_idx_eb;
+ uint32_t *translit_from_idx_el;
+ uint32_t *translit_from_tbl_eb;
+ uint32_t *translit_from_tbl_el;
+ uint32_t *translit_to_idx_eb;
+ uint32_t *translit_to_idx_el;
+ uint32_t *translit_to_tbl_eb;
+ uint32_t *translit_to_tbl_el;
+ size_t translit_idx_size;
+ size_t translit_from_tbl_size;
+ size_t translit_to_tbl_size;
+
+ struct obstack mem_pool;
};
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+
/* Prototypes for local functions. */
-static void ctype_class_newP (struct linereader *lr,
- struct locale_ctype_t *ctype, const char *name);
-static void ctype_map_newP (struct linereader *lr,
- struct locale_ctype_t *ctype,
- const char *name, struct charset_t *charset);
-static u_int32_t *find_idx (struct locale_ctype_t *ctype, u_int32_t **table,
- size_t *max, size_t *act, unsigned int idx);
+static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
+ struct charmap_t *charmap, int ignore_content);
+static void ctype_class_new (struct linereader *lr,
+ struct locale_ctype_t *ctype, const char *name);
+static void ctype_map_new (struct linereader *lr,
+ struct locale_ctype_t *ctype,
+ const char *name, struct charmap_t *charmap);
+static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
+ size_t *max, size_t *act, unsigned int idx);
static void set_class_defaults (struct locale_ctype_t *ctype,
- struct charset_t *charset);
+ struct charmap_t *charmap,
+ struct repertoire_t *repertoire);
static void allocate_arrays (struct locale_ctype_t *ctype,
- struct charset_t *charset);
+ struct charmap_t *charmap,
+ struct repertoire_t *repertoire);
-void
+static const char *longnames[] =
+{
+ "zero", "one", "two", "three", "four",
+ "five", "six", "seven", "eight", "nine"
+};
+static const unsigned char digits[] = "0123456789";
+
+
+static void
ctype_startup (struct linereader *lr, struct localedef_t *locale,
- struct charset_t *charset)
+ struct charmap_t *charmap, int ignore_content)
{
unsigned int cnt;
struct locale_ctype_t *ctype;
- /* We have a definition for LC_CTYPE. */
- copy_posix.mask &= ~(1 << LC_CTYPE);
-
- /* It is important that we always use UCS1 encoding for strings now. */
- encoding_method = ENC_UCS1;
-
- /* Allocate the needed room. */
- locale->categories[LC_CTYPE].ctype = ctype =
- (struct locale_ctype_t *) xmalloc (sizeof (struct locale_ctype_t));
-
- /* We have no names seen yet. */
- ctype->charnames_max = charset->mb_cur_max == 1 ? 256 : 512;
- ctype->charnames =
- (unsigned int *) xmalloc (ctype->charnames_max * sizeof (unsigned int));
- for (cnt = 0; cnt < 256; ++cnt)
- ctype->charnames[cnt] = cnt;
- ctype->charnames_act = 256;
-
- /* Fill character class information. */
- ctype->nr_charclass = 0;
- ctype->current_class_mask = 0;
- ctype->last_class_char = ILLEGAL_CHAR_VALUE;
- /* The order of the following instructions determines the bit
- positions! */
- ctype_class_newP (lr, ctype, "upper");
- ctype_class_newP (lr, ctype, "lower");
- ctype_class_newP (lr, ctype, "alpha");
- ctype_class_newP (lr, ctype, "digit");
- ctype_class_newP (lr, ctype, "xdigit");
- ctype_class_newP (lr, ctype, "space");
- ctype_class_newP (lr, ctype, "print");
- ctype_class_newP (lr, ctype, "graph");
- ctype_class_newP (lr, ctype, "blank");
- ctype_class_newP (lr, ctype, "cntrl");
- ctype_class_newP (lr, ctype, "punct");
- ctype_class_newP (lr, ctype, "alnum");
-
- ctype->class_collection_max = charset->mb_cur_max == 1 ? 256 : 512;
- ctype->class_collection
- = (u_int32_t *) xmalloc (sizeof (unsigned long int)
- * ctype->class_collection_max);
- memset (ctype->class_collection, '\0',
- sizeof (unsigned long int) * ctype->class_collection_max);
- ctype->class_collection_act = 256;
-
- /* Fill character map information. */
- ctype->map_collection_nr = 0;
- ctype->last_map_idx = MAX_NR_CHARMAP;
- ctype->from_map_char = ILLEGAL_CHAR_VALUE;
- ctype_map_newP (lr, ctype, "toupper", charset);
- ctype_map_newP (lr, ctype, "tolower", charset);
-
- /* Fill first 256 entries in `toupper' and `tolower' arrays. */
- for (cnt = 0; cnt < 256; ++cnt)
+ if (!ignore_content)
{
- ctype->map_collection[0][cnt] = cnt;
- ctype->map_collection[1][cnt] = cnt;
+ /* Allocate the needed room. */
+ locale->categories[LC_CTYPE].ctype = ctype =
+ (struct locale_ctype_t *) xcalloc (1, sizeof (struct locale_ctype_t));
+
+ /* We have seen no names yet. */
+ ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
+ ctype->charnames =
+ (unsigned int *) xmalloc (ctype->charnames_max
+ * sizeof (unsigned int));
+ for (cnt = 0; cnt < 256; ++cnt)
+ ctype->charnames[cnt] = cnt;
+ ctype->charnames_act = 256;
+
+ /* Fill character class information. */
+ ctype->last_class_char = ILLEGAL_CHAR_VALUE;
+ /* The order of the following instructions determines the bit
+ positions! */
+ ctype_class_new (lr, ctype, "upper");
+ ctype_class_new (lr, ctype, "lower");
+ ctype_class_new (lr, ctype, "alpha");
+ ctype_class_new (lr, ctype, "digit");
+ ctype_class_new (lr, ctype, "xdigit");
+ ctype_class_new (lr, ctype, "space");
+ ctype_class_new (lr, ctype, "print");
+ ctype_class_new (lr, ctype, "graph");
+ ctype_class_new (lr, ctype, "blank");
+ ctype_class_new (lr, ctype, "cntrl");
+ ctype_class_new (lr, ctype, "punct");
+ ctype_class_new (lr, ctype, "alnum");
+ /* The following are extensions from ISO 14652. */
+ ctype_class_new (lr, ctype, "left_to_right");
+ ctype_class_new (lr, ctype, "right_to_left");
+ ctype_class_new (lr, ctype, "num_terminator");
+ ctype_class_new (lr, ctype, "num_separator");
+ ctype_class_new (lr, ctype, "segment_separator");
+ ctype_class_new (lr, ctype, "block_separator");
+ ctype_class_new (lr, ctype, "direction_control");
+ ctype_class_new (lr, ctype, "sym_swap_layout");
+ ctype_class_new (lr, ctype, "char_shape_selector");
+ ctype_class_new (lr, ctype, "num_shape_selector");
+ ctype_class_new (lr, ctype, "non_spacing");
+ ctype_class_new (lr, ctype, "non_spacing_level3");
+ ctype_class_new (lr, ctype, "normal_connect");
+ ctype_class_new (lr, ctype, "r_connect");
+ ctype_class_new (lr, ctype, "no_connect");
+ ctype_class_new (lr, ctype, "no_connect-space");
+ ctype_class_new (lr, ctype, "vowel_connect");
+
+ ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
+ ctype->class_collection
+ = (uint32_t *) xcalloc (sizeof (unsigned long int),
+ ctype->class_collection_max);
+ ctype->class_collection_act = 256;
+
+ /* Fill character map information. */
+ ctype->map_collection_nr = 0;
+ ctype->last_map_idx = MAX_NR_CHARMAP;
+ ctype_map_new (lr, ctype, "toupper", charmap);
+ ctype_map_new (lr, ctype, "tolower", charmap);
+ ctype_map_new (lr, ctype, "tosymmetric", charmap);
+
+ /* Fill first 256 entries in `toXXX' arrays. */
+ for (cnt = 0; cnt < 256; ++cnt)
+ {
+ ctype->map_collection[0][cnt] = cnt;
+ ctype->map_collection[1][cnt] = cnt;
+ ctype->map_collection[2][cnt] = cnt;
+ ctype->map256_collection[0][cnt] = cnt;
+ ctype->map256_collection[1][cnt] = cnt;
+ }
+
+ obstack_init (&ctype->mem_pool);
}
}
void
-ctype_finish (struct localedef_t *locale, struct charset_t *charset)
+ctype_finish (struct localedef_t *locale, struct charmap_t *charmap)
{
/* See POSIX.2, table 2-6 for the meaning of the following table. */
#define NCLASS 12
@@ -226,106 +317,138 @@ ctype_finish (struct localedef_t *locale, struct charset_t *charset)
};
size_t cnt;
int cls1, cls2;
- unsigned int space_value;
+ uint32_t space_value;
+ struct charseq *space_seq;
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ int warned;
/* Set default value for classes not specified. */
- set_class_defaults (ctype, charset);
+ set_class_defaults (ctype, charmap, ctype->repertoire);
/* Check according to table. */
for (cnt = 0; cnt < ctype->class_collection_max; ++cnt)
{
- unsigned long int tmp;
+ uint32_t tmp = ctype->class_collection[cnt];
- tmp = ctype->class_collection[cnt];
- if (tmp == 0)
- continue;
-
- for (cls1 = 0; cls1 < NCLASS; ++cls1)
- if ((tmp & (1 << cls1)) != 0)
- for (cls2 = 0; cls2 < NCLASS; ++cls2)
- if (valid_table[cls1].allow[cls2] != '-')
- {
- int eq = (tmp & (1 << cls2)) != 0;
- switch (valid_table[cls1].allow[cls2])
+ if (tmp != 0)
+ {
+ for (cls1 = 0; cls1 < NCLASS; ++cls1)
+ if ((tmp & _ISwbit (cls1)) != 0)
+ for (cls2 = 0; cls2 < NCLASS; ++cls2)
+ if (valid_table[cls1].allow[cls2] != '-')
{
- case 'M':
- if (!eq)
+ int eq = (tmp & _ISwbit (cls2)) != 0;
+ switch (valid_table[cls1].allow[cls2])
{
- char buf[17];
- char *cp = buf;
- unsigned int value;
-
- value = ctype->charnames[cnt];
-
- if ((value & 0xff000000) != 0)
- cp += sprintf (cp, "\\%o", (value >> 24) & 0xff);
- if ((value & 0xffff0000) != 0)
- cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
- if ((value & 0xffffff00) != 0)
- cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
- sprintf (cp, "\\%o", value & 0xff);
-
- if (!be_quiet)
- error (0, 0, _("\
-character %s'%s' in class `%s' must be in class `%s'"), value > 256 ? "L" : "",
- buf, valid_table[cls1].name,
- valid_table[cls2].name);
+ case 'M':
+ if (!eq)
+ {
+ uint32_t value = ctype->charnames[cnt];
+
+ if (!be_quiet)
+ error (0, 0, _("\
+character L'\\u%0*x' in class `%s' must be in class `%s'"),
+ value > 0xffff ? 8 : 4, value,
+ valid_table[cls1].name,
+ valid_table[cls2].name);
+ }
+ break;
+
+ case 'X':
+ if (eq)
+ {
+ uint32_t value = ctype->charnames[cnt];
+
+ if (!be_quiet)
+ error (0, 0, _("\
+character L'\\u%0*x' in class `%s' must not be in class `%s'"),
+ value > 0xffff ? 8 : 4, value,
+ valid_table[cls1].name,
+ valid_table[cls2].name);
+ }
+ break;
+
+ case 'D':
+ ctype->class_collection[cnt] |= _ISwbit (cls2);
+ break;
+
+ default:
+ error (5, 0, _("internal error in %s, line %u"),
+ __FUNCTION__, __LINE__);
}
- break;
+ }
+ }
+ }
+
+ for (cnt = 0; cnt < 256; ++cnt)
+ {
+ uint32_t tmp = ctype->class256_collection[cnt];
- case 'X':
- if (eq)
+ if (tmp != 0)
+ {
+ for (cls1 = 0; cls1 < NCLASS; ++cls1)
+ if ((tmp & _ISbit (cls1)) != 0)
+ for (cls2 = 0; cls2 < NCLASS; ++cls2)
+ if (valid_table[cls1].allow[cls2] != '-')
+ {
+ int eq = (tmp & _ISbit (cls2)) != 0;
+ switch (valid_table[cls1].allow[cls2])
{
- char buf[17];
- char *cp = buf;
- unsigned int value;
-
- value = ctype->charnames[cnt];
-
- if ((value & 0xff000000) != 0)
- cp += sprintf (cp, "\\%o", value >> 24);
- if ((value & 0xffff0000) != 0)
- cp += sprintf (cp, "\\%o", (value >> 16) & 0xff);
- if ((value & 0xffffff00) != 0)
- cp += sprintf (cp, "\\%o", (value >> 8) & 0xff);
- sprintf (cp, "\\%o", value & 0xff);
-
- if (!be_quiet)
- error (0, 0, _("\
-character %s'%s' in class `%s' must not be in class `%s'"),
- value > 256 ? "L" : "", buf,
- valid_table[cls1].name,
- valid_table[cls2].name);
+ case 'M':
+ if (!eq)
+ {
+ char buf[17];
+
+ sprintf (buf, "\\%o", cnt);
+
+ if (!be_quiet)
+ error (0, 0, _("\
+character '%s' in class `%s' must be in class `%s'"),
+ buf, valid_table[cls1].name,
+ valid_table[cls2].name);
+ }
+ break;
+
+ case 'X':
+ if (eq)
+ {
+ char buf[17];
+
+ sprintf (buf, "\\%o", cnt);
+
+ if (!be_quiet)
+ error (0, 0, _("\
+character '%s' in class `%s' must not be in class `%s'"),
+ buf, valid_table[cls1].name,
+ valid_table[cls2].name);
+ }
+ break;
+
+ case 'D':
+ ctype->class256_collection[cnt] |= _ISbit (cls2);
+ break;
+
+ default:
+ error (5, 0, _("internal error in %s, line %u"),
+ __FUNCTION__, __LINE__);
}
- break;
-
- case 'D':
- ctype->class_collection[cnt] |= 1 << cls2;
- break;
-
- default:
- error (5, 0, _("internal error in %s, line %u"),
- __FUNCTION__, __LINE__);
- }
- }
+ }
+ }
}
/* ... and now test <SP> as a special case. */
- space_value = charset_find_value (&charset->char_table, "SP", 2);
- if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
- space_value = charset_find_value (&charset->char_table, "space", 5);
- if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
+ space_value = repertoire_find_value (ctype->repertoire, "SP", 2);
+ if (space_value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
error (0, 0, _("character <SP> not defined in character map"));
}
else if (((cnt = BITPOS (tok_space),
(ELEM (ctype, class_collection, , space_value)
- & BIT (tok_space)) == 0)
+ & BITw (tok_space)) == 0)
|| (cnt = BITPOS (tok_blank),
(ELEM (ctype, class_collection, , space_value)
- & BIT (tok_blank)) == 0)))
+ & BITw (tok_blank)) == 0)))
{
if (!be_quiet)
error (0, 0, _("<SP> character not in class `%s'"),
@@ -333,10 +456,10 @@ character %s'%s' in class `%s' must not be in class `%s'"),
}
else if (((cnt = BITPOS (tok_punct),
(ELEM (ctype, class_collection, , space_value)
- & BIT (tok_punct)) != 0)
+ & BITw (tok_punct)) != 0)
|| (cnt = BITPOS (tok_graph),
(ELEM (ctype, class_collection, , space_value)
- & BIT (tok_graph))
+ & BITw (tok_graph))
!= 0)))
{
if (!be_quiet)
@@ -344,24 +467,205 @@ character %s'%s' in class `%s' must not be in class `%s'"),
valid_table[cnt].name);
}
else
- ELEM (ctype, class_collection, , space_value) |= BIT (tok_print);
+ ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
+
+ space_seq = charmap_find_value (charmap, "SP", 2);
+ if (space_seq == NULL || space_seq->nbytes != 1)
+ {
+ if (!be_quiet)
+ error (0, 0, _("character <SP> not defined in character map"));
+ }
+ else if (((cnt = BITPOS (tok_space),
+ (ctype->class256_collection[space_seq->bytes[0]]
+ & BIT (tok_space)) == 0)
+ || (cnt = BITPOS (tok_blank),
+ (ctype->class256_collection[space_seq->bytes[0]]
+ & BIT (tok_blank)) == 0)))
+ {
+ if (!be_quiet)
+ error (0, 0, _("<SP> character not in class `%s'"),
+ valid_table[cnt].name);
+ }
+ else if (((cnt = BITPOS (tok_punct),
+ (ctype->class256_collection[space_seq->bytes[0]]
+ & BIT (tok_punct)) != 0)
+ || (cnt = BITPOS (tok_graph),
+ (ctype->class256_collection[space_seq->bytes[0]]
+ & BIT (tok_graph)) != 0)))
+ {
+ if (!be_quiet)
+ error (0, 0, _("<SP> character must not be in class `%s'"),
+ valid_table[cnt].name);
+ }
+ else
+ ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
/* Now that the tests are done make sure the name array contains all
characters which are handled in the WIDTH section of the
character set definition file. */
- if (charset->width_rules != NULL)
- for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
+ if (charmap->width_rules != NULL)
+ for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
{
+#if 0
size_t inner;
- for (inner = charset->width_rules[cnt].from;
- inner <= charset->width_rules[cnt].to; ++inner)
+ for (inner = charmap->width_rules[cnt].from;
+ inner <= charmap->width_rules[cnt].to; ++inner)
(void) find_idx (ctype, NULL, NULL, NULL, inner);
+#else
+ /* XXX Handle width. We must convert from the charseq to the
+ repertoire value */
+ abort ();
+#endif
+ }
+
+ /* There must be a multiple of 10 digits. */
+ if (ctype->mbdigits_act % 10 != 0)
+ {
+ assert (ctype->mbdigits_act == ctype->wcdigits_act);
+ ctype->wcdigits_act -= ctype->mbdigits_act % 10;
+ ctype->mbdigits_act -= ctype->mbdigits_act % 10;
+ error (0, 0, _("`digit' category has not entries in groups of ten"));
+ }
+
+ /* Check the input digits. There must be a multiple of ten available.
+ In each group I could be that one or the other character is missing.
+ In this case the whole group must be removed. */
+ cnt = 0;
+ while (cnt < ctype->mbdigits_act)
+ {
+ size_t inner;
+ for (inner = 0; inner < 10; ++inner)
+ if (ctype->mbdigits[cnt + inner] == NULL)
+ break;
+
+ if (inner == 10)
+ cnt += 10;
+ else
+ {
+ /* Remove the group. */
+ memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
+ ((ctype->wcdigits_act - cnt - 10)
+ * sizeof (ctype->mbdigits[0])));
+ ctype->mbdigits_act -= 10;
+ }
+ }
+
+ /* If no input digits are given use the default. */
+ if (ctype->mbdigits_act == 0)
+ {
+ if (ctype->mbdigits_max == 0)
+ {
+ ctype->mbdigits = obstack_alloc (&charmap->mem_pool,
+ 10 * sizeof (struct charseq *));
+ ctype->mbdigits_max = 10;
+ }
+
+ for (cnt = 0; cnt < 10; ++cnt)
+ {
+ ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
+ digits + cnt, 1);
+ if (ctype->mbdigits[cnt] == NULL)
+ {
+ ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
+ longnames[cnt],
+ strlen (longnames[cnt]));
+ if (ctype->mbdigits[cnt] == NULL)
+ {
+ /* Hum, this ain't good. */
+ error (0, 0, _("\
+no input digits defined and none of the standard names in the charmap"));
+
+ ctype->mbdigits[cnt] = obstack_alloc (&charmap->mem_pool,
+ sizeof (struct charseq) + 1);
+
+ /* This is better than nothing. */
+ ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
+ ctype->mbdigits[cnt]->nbytes = 1;
+ }
+ }
+ }
+
+ ctype->mbdigits_act = 10;
+ }
+
+ /* Check the wide character input digits. There must be a multiple
+ of ten available. In each group I could be that one or the other
+ character is missing. In this case the whole group must be
+ removed. */
+ cnt = 0;
+ while (cnt < ctype->wcdigits_act)
+ {
+ size_t inner;
+ for (inner = 0; inner < 10; ++inner)
+ if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
+ break;
+
+ if (inner == 10)
+ cnt += 10;
+ else
+ {
+ /* Remove the group. */
+ memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
+ ((ctype->wcdigits_act - cnt - 10)
+ * sizeof (ctype->wcdigits[0])));
+ ctype->wcdigits_act -= 10;
+ }
+ }
+
+ /* If no input digits are given use the default. */
+ if (ctype->wcdigits_act == 0)
+ {
+ if (ctype->wcdigits_max == 0)
+ {
+ ctype->wcdigits = obstack_alloc (&charmap->mem_pool,
+ 10 * sizeof (uint32_t));
+ ctype->wcdigits_max = 10;
+ }
+
+ for (cnt = 0; cnt < 10; ++cnt)
+ ctype->wcdigits[cnt] = L'0' + cnt;
+
+ ctype->mbdigits_act = 10;
+ }
+
+ /* Check the outdigits. */
+ warned = 0;
+ for (cnt = 0; cnt < 10; ++cnt)
+ if (ctype->mboutdigits[cnt] == NULL)
+ {
+ static struct charseq replace[2];
+
+ if (!warned)
+ {
+ error (0, 0, _("\
+not all characters used in `outdigit' are available in the charmap"));
+ warned = 1;
+ }
+
+ replace[0].nbytes = 1;
+ replace[0].bytes[0] = '?';
+ replace[0].bytes[1] = '\0';
+ ctype->mboutdigits[cnt] = &replace[0];
+ }
+
+ warned = 0;
+ for (cnt = 0; cnt < 10; ++cnt)
+ if (ctype->wcoutdigits[cnt] == 0)
+ {
+ if (!warned)
+ {
+ error (0, 0, _("\
+not all characters used in `outdigit' are available in the repertoire"));
+ warned = 1;
+ }
+
+ ctype->wcoutdigits[cnt] = L'?';
}
}
void
-ctype_output (struct localedef_t *locale, struct charset_t *charset,
+ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
const char *output_path)
{
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
@@ -370,23 +674,12 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset,
struct iovec iov[2 + nelems + ctype->nr_charclass
+ ctype->map_collection_nr];
struct locale_file data;
- u_int32_t idx[nelems];
+ uint32_t idx[nelems + 1];
size_t elem, cnt, offset, total;
-
-
- if ((locale->binary & (1 << LC_CTYPE)) != 0)
- {
- iov[0].iov_base = ctype;
- iov[0].iov_len = locale->len[LC_CTYPE];
-
- write_locale_data (output_path, "LC_CTYPE", 1, iov);
-
- return;
- }
-
+ char *cp;
/* Now prepare the output: Find the sizes of the table we can use. */
- allocate_arrays (ctype, charset);
+ allocate_arrays (ctype, charmap, ctype->repertoire);
data.magic = LIMAGIC (LC_CTYPE);
data.n = nelems;
@@ -419,20 +712,20 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset,
CTYPE_DATA (_NL_CTYPE_TOUPPER_EB,
ctype->map_eb[0],
(ctype->plane_size * ctype->plane_cnt + 128)
- * sizeof (u_int32_t));
+ * sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TOLOWER_EB,
ctype->map_eb[1],
(ctype->plane_size * ctype->plane_cnt + 128)
- * sizeof (u_int32_t));
+ * sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TOUPPER_EL,
ctype->map_el[0],
(ctype->plane_size * ctype->plane_cnt + 128)
- * sizeof (u_int32_t));
+ * sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TOLOWER_EL,
ctype->map_el[1],
(ctype->plane_size * ctype->plane_cnt + 128)
- * sizeof (u_int32_t));
+ * sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_CLASS32,
ctype->ctype32_b,
@@ -441,15 +734,88 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset,
CTYPE_DATA (_NL_CTYPE_NAMES_EB,
ctype->names_eb, (ctype->plane_size * ctype->plane_cnt
- * sizeof (u_int32_t)));
+ * sizeof (uint32_t)));
CTYPE_DATA (_NL_CTYPE_NAMES_EL,
ctype->names_el, (ctype->plane_size * ctype->plane_cnt
- * sizeof (u_int32_t)));
-
- CTYPE_DATA (_NL_CTYPE_HASH_SIZE,
- &ctype->plane_size, sizeof (u_int32_t));
- CTYPE_DATA (_NL_CTYPE_HASH_LAYERS,
- &ctype->plane_cnt, sizeof (u_int32_t));
+ * sizeof (uint32_t)));
+
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE_EB,
+ &ctype->translit_hash_size_eb, sizeof (uint32_t));
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE_EL,
+ &ctype->translit_hash_size_el, sizeof (uint32_t));
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS_EB,
+ &ctype->translit_hash_layers_eb, sizeof (uint32_t));
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS_EL,
+ &ctype->translit_hash_layers_el, sizeof (uint32_t));
+
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX_EB,
+ ctype->translit_from_idx_eb,
+ ctype->translit_idx_size);
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX_EL,
+ ctype->translit_from_idx_el,
+ ctype->translit_idx_size);
+
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL_EB,
+ ctype->translit_from_tbl_eb,
+ ctype->translit_from_tbl_size);
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL_EL,
+ ctype->translit_from_tbl_el,
+ ctype->translit_from_tbl_size);
+
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX_EB,
+ ctype->translit_to_idx_eb,
+ ctype->translit_idx_size);
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX_EL,
+ ctype->translit_to_idx_el,
+ ctype->translit_idx_size);
+
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL_EB,
+ ctype->translit_to_tbl_eb, ctype->translit_to_tbl_size);
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL_EL,
+ ctype->translit_to_tbl_el, ctype->translit_to_tbl_size);
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+ CTYPE_DATA (_NL_CTYPE_HASH_SIZE_EB,
+ &ctype->plane_size, sizeof (uint32_t));
+ CTYPE_DATA (_NL_CTYPE_HASH_LAYERS_EB,
+ &ctype->plane_cnt, sizeof (uint32_t));
+#else
+ case _NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE_EB):
+ iov[2 + elem + offset].iov_base =
+ (uint32_t *) alloca (sizeof (uint32_t));
+ *(uint32_t *) iov[2 + elem + offset].iov_base =
+ bswap_32 (ctype->plane_size);
+ iov[2 + elem + offset].iov_len = sizeof (uint32_t);
+ break;
+ case _NL_ITEM_INDEX (_NL_CTYPE_HASH_LAYERS_EB):
+ iov[2 + elem + offset].iov_base =
+ (uint32_t *) alloca (sizeof (uint32_t));
+ *(uint32_t *) iov[2 + elem + offset].iov_base =
+ bswap_32 (ctype->plane_cnt);
+ iov[2 + elem + offset].iov_len = sizeof (uint32_t);
+ break;
+#endif
+#if __BYTE_ORDER == __BIG_ENDIAN
+ CTYPE_DATA (_NL_CTYPE_HASH_SIZE_EL,
+ &ctype->plane_size, sizeof (uint32_t));
+ CTYPE_DATA (_NL_CTYPE_HASH_LAYERS_EL,
+ &ctype->plane_cnt, sizeof (uint32_t));
+#else
+ case _NL_ITEM_INDEX (_NL_CTYPE_HASH_SIZE_EL):
+ iov[2 + elem + offset].iov_base =
+ (uint32_t *) alloca (sizeof (uint32_t));
+ *(uint32_t *) iov[2 + elem + offset].iov_base =
+ bswap_32 (ctype->plane_size);
+ iov[2 + elem + offset].iov_len = sizeof (uint32_t);
+ break;
+ case _NL_ITEM_INDEX (_NL_CTYPE_HASH_LAYERS_EL):
+ iov[2 + elem + offset].iov_base =
+ (uint32_t *) alloca (sizeof (uint32_t));
+ *(uint32_t *) iov[2 + elem + offset].iov_base =
+ bswap_32 (ctype->plane_cnt);
+ iov[2 + elem + offset].iov_len = sizeof (uint32_t);
+ break;
+#endif
case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
/* The class name array. */
@@ -466,8 +832,7 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset,
iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
total += 1 + (4 - ((total + 1) % 4));
- if (elem + 1 < nelems)
- idx[elem + 1] = idx[elem] + total;
+ idx[elem + 1] = idx[elem] + total;
break;
case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
@@ -485,15 +850,14 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset,
iov[2 + elem + offset].iov_len = 1 + (4 - ((total + 1) % 4));
total += 1 + (4 - ((total + 1) % 4));
- if (elem + 1 < nelems)
- idx[elem + 1] = idx[elem] + total;
+ idx[elem + 1] = idx[elem] + total;
break;
CTYPE_DATA (_NL_CTYPE_WIDTH,
ctype->width, ctype->plane_size * ctype->plane_cnt);
CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
- &ctype->mb_cur_max, sizeof (u_int32_t));
+ &ctype->mb_cur_max, sizeof (uint32_t));
case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
total = strlen (ctype->codeset_name) + 1;
@@ -508,8 +872,127 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset,
total = (total + 3) & ~3;
}
iov[2 + elem + offset].iov_len = total;
- if (elem + 1 < nelems)
- idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
+ idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EB):
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EL):
+ iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
+ iov[2 + elem + offset].iov_len = sizeof (uint32_t);
+ if ((elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EB)
+ && __BYTE_ORDER == __BIG_ENDIAN)
+ || (elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN_EL)
+ && __BYTE_ORDER == __LITTLE_ENDIAN))
+ *(uint32_t *) iov[2 + elem + offset].iov_base =
+ ctype->mbdigits_act / 10;
+ else
+ *(uint32_t *) iov[2 + elem + offset].iov_base =
+ bswap_32 (ctype->mbdigits_act / 10);
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EB):
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EL):
+ iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
+ iov[2 + elem + offset].iov_len = sizeof (uint32_t);
+ if ((elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EB)
+ && __BYTE_ORDER == __BIG_ENDIAN)
+ || (elem == _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN_EL)
+ && __BYTE_ORDER == __LITTLE_ENDIAN))
+ *(uint32_t *) iov[2 + elem + offset].iov_base =
+ ctype->wcdigits_act / 10;
+ else
+ *(uint32_t *) iov[2 + elem + offset].iov_base =
+ bswap_32 (ctype->wcdigits_act / 10);
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
+ /* Compute the length of all possible characters. For INDIGITS
+ there might be more than one. We simply concatenate all of
+ them with a NUL byte following. The NUL byte wouldn't be
+ necessary but it makes it easier for the user. */
+ total = 0;
+ for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
+ cnt < ctype->mbdigits_act; cnt += 10)
+ total += ctype->mbdigits[cnt]->nbytes + 1;
+ iov[2 + elem + offset].iov_base = (char *) alloca (total);
+ iov[2 + elem + offset].iov_len = total;
+
+ cp = iov[2 + elem + offset].iov_base;
+ for (cnt = elem - _NL_CTYPE_INDIGITS0_MB;
+ cnt < ctype->mbdigits_act; cnt += 10)
+ {
+ cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
+ ctype->mbdigits[cnt]->nbytes);
+ *cp++ = '\0';
+ }
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
+ /* Compute the length of all possible characters. For INDIGITS
+ there might be more than one. We simply concatenate all of
+ them with a NUL byte following. The NUL byte wouldn't be
+ necessary but it makes it easier for the user. */
+ cnt = elem - _NL_CTYPE_OUTDIGIT0_MB;
+ total = ctype->mboutdigits[cnt]->nbytes + 1;
+ iov[2 + elem + offset].iov_base = (char *) alloca (total);
+ iov[2 + elem + offset].iov_len = total;
+
+ *(char *) mempcpy (iov[2 + elem + offset].iov_base,
+ ctype->mbdigits[cnt]->bytes,
+ ctype->mbdigits[cnt]->nbytes) = '\0';
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC_EB):
+ total = ctype->wcdigits_act / 10;
+
+ iov[2 + elem + offset].iov_base =
+ (uint32_t *) alloca (total * sizeof (uint32_t));
+ iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
+
+ for (cnt = elem - _NL_CTYPE_INDIGITS0_WC_EB;
+ cnt < ctype->wcdigits_act; cnt += 10)
+ ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
+ = (__BYTE_ORDER == __LITTLE_ENDIAN
+ ? bswap_32 (ctype->wcdigits[cnt]) : ctype->wcdigits[cnt]);
+ break;
+
+ case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC_EL):
+ total = ctype->wcdigits_act / 10;
+
+ iov[2 + elem + offset].iov_base =
+ (uint32_t *) alloca (total * sizeof (uint32_t));
+ iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
+
+ for (cnt = elem - _NL_CTYPE_INDIGITS0_WC_EL;
+ cnt < ctype->wcdigits_act; cnt += 10)
+ ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
+ = (__BYTE_ORDER == __BIG_ENDIAN
+ ? bswap_32 (ctype->wcdigits[cnt]) : ctype->wcdigits[cnt]);
+ break;
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+ case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EB):
+ cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EB;
+#else
+ case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EL):
+ cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EL;
+#endif
+ iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
+ iov[2 + elem + offset].iov_len = sizeof (uint32_t);
+ break;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EB):
+ cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EB;
+#else
+ case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC_EL) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC_EL):
+ cnt = elem - _NL_CTYPE_OUTDIGIT0_WC_EL;
+#endif
+ iov[2 + elem + offset].iov_base =
+ (uint32_t *) alloca (sizeof (uint32_t));
+ *(uint32_t *) iov[2 + elem + offset].iov_base =
+ bswap_32 (ctype->wcoutdigits[cnt]);
+ iov[2 + elem + offset].iov_len = sizeof (uint32_t);
break;
default:
@@ -527,10 +1010,9 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset,
iov[2 + elem + offset].iov_len = ((ctype->plane_size
* ctype->plane_cnt + 128)
- * sizeof (u_int32_t));
+ * sizeof (uint32_t));
- if (elem + 1 < nelems)
- idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
+ idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
}
}
@@ -541,596 +1023,1575 @@ ctype_output (struct localedef_t *locale, struct charset_t *charset,
}
-/* Character class handling. */
-void
-ctype_class_new (struct linereader *lr, struct localedef_t *locale,
- enum token_t tok, struct token *code,
- struct charset_t *charset)
+/* Local functions. */
+static void
+ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
+ const char *name)
{
- ctype_class_newP (lr, locale->categories[LC_CTYPE].ctype,
- code->val.str.start);
+ size_t cnt;
+
+ for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
+ if (strcmp (ctype->classnames[cnt], name) == 0)
+ break;
+
+ if (cnt < ctype->nr_charclass)
+ {
+ lr_error (lr, _("character class `%s' already defined"), name);
+ return;
+ }
+
+ if (ctype->nr_charclass == MAX_NR_CHARCLASS)
+ /* Exit code 2 is prescribed in P1003.2b. */
+ error (2, 0, _("\
+implementation limit: no more than %d character classes allowed"),
+ MAX_NR_CHARCLASS);
+
+ ctype->classnames[ctype->nr_charclass++] = name;
}
-int
-ctype_is_charclass (struct linereader *lr, struct localedef_t *locale,
- const char *name)
+static void
+ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
+ const char *name, struct charmap_t *charmap)
{
+ size_t max_chars = 0;
size_t cnt;
- for (cnt = 0; cnt < locale->categories[LC_CTYPE].ctype->nr_charclass; ++cnt)
- if (strcmp (name, locale->categories[LC_CTYPE].ctype->classnames[cnt])
- == 0)
- return 1;
+ for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
+ {
+ if (strcmp (ctype->mapnames[cnt], name) == 0)
+ break;
- return 0;
+ if (max_chars < ctype->map_collection_max[cnt])
+ max_chars = ctype->map_collection_max[cnt];
+ }
+
+ if (cnt < ctype->map_collection_nr)
+ {
+ lr_error (lr, _("character map `%s' already defined"), name);
+ return;
+ }
+
+ if (ctype->map_collection_nr == MAX_NR_CHARMAP)
+ /* Exit code 2 is prescribed in P1003.2b. */
+ error (2, 0, _("\
+implementation limit: no more than %d character maps allowed"),
+ MAX_NR_CHARMAP);
+
+ ctype->mapnames[cnt] = name;
+
+ if (max_chars == 0)
+ ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
+ else
+ ctype->map_collection_max[cnt] = max_chars;
+
+ ctype->map_collection[cnt] = (uint32_t *)
+ xmalloc (sizeof (uint32_t) * ctype->map_collection_max[cnt]);
+ memset (ctype->map_collection[cnt], '\0',
+ sizeof (uint32_t) * ctype->map_collection_max[cnt]);
+ ctype->map_collection_act[cnt] = 256;
+
+ ++ctype->map_collection_nr;
}
-void
-ctype_class_start (struct linereader *lr, struct localedef_t *locale,
- enum token_t tok, const char *str,
- struct charset_t *charset)
+/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
+ is possible if we only want ot extend the name array. */
+static uint32_t *
+find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
+ size_t *act, uint32_t idx)
{
- struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
size_t cnt;
- switch (tok)
- {
- case tok_upper:
- str = "upper";
- break;
- case tok_lower:
- str = "lower";
- break;
- case tok_alpha:
- str = "alpha";
- break;
- case tok_digit:
- str = "digit";
- break;
- case tok_xdigit:
- str = "xdigit";
- break;
- case tok_space:
- str = "space";
- break;
- case tok_print:
- str = "print";
- break;
- case tok_graph:
- str = "graph";
- break;
- case tok_blank:
- str = "blank";
- break;
- case tok_cntrl:
- str = "cntrl";
- break;
- case tok_punct:
- str = "punct";
- break;
- case tok_alnum:
- str = "alnum";
- break;
- case tok_ident:
+ if (idx < 256)
+ return table == NULL ? NULL : &(*table)[idx];
+
+ for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
+ if (ctype->charnames[cnt] == idx)
break;
- default:
- assert (! "illegal token as class name: should not happen");
+
+ /* We have to distinguish two cases: the name is found or not. */
+ if (cnt == ctype->charnames_act)
+ {
+ /* Extend the name array. */
+ if (ctype->charnames_act == ctype->charnames_max)
+ {
+ ctype->charnames_max *= 2;
+ ctype->charnames = (unsigned int *)
+ xrealloc (ctype->charnames,
+ sizeof (unsigned int) * ctype->charnames_max);
+ }
+ ctype->charnames[ctype->charnames_act++] = idx;
}
- for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
- if (strcmp (str, ctype->classnames[cnt]) == 0)
- break;
+ if (table == NULL)
+ /* We have done everything we are asked to do. */
+ return NULL;
+
+ if (cnt >= *act)
+ {
+ if (cnt >= *max)
+ {
+ size_t old_max = *max;
+ do
+ *max *= 2;
+ while (*max <= cnt);
- if (cnt >= ctype->nr_charclass)
- assert (! "unknown class in class definition: should not happen");
+ *table =
+ (uint32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
+ memset (&(*table)[old_max], '\0',
+ (*max - old_max) * sizeof (uint32_t));
+ }
- ctype->class_done |= BIT (tok);
+ *act = cnt;
+ }
- ctype->current_class_mask = 1 << cnt;
- ctype->last_class_char = ILLEGAL_CHAR_VALUE;
+ return &(*table)[cnt];
}
-void
-ctype_class_from (struct linereader *lr, struct localedef_t *locale,
- struct token *code, struct charset_t *charset)
+static int
+get_character (struct token *now, struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct charseq **seqp, uint32_t *wchp)
{
- struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
- unsigned int value;
+ if (now->tok == tok_bsymbol)
+ {
+ /* This will hopefully be the normal case. */
+ *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
+ now->val.str.lenmb);
+ *seqp = charmap_find_value (charmap, now->val.str.startmb,
+ now->val.str.lenmb);
+ }
+ else if (now->tok == tok_ucs4)
+ {
+ *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
- value = charset_find_value (&charset->char_table, code->val.str.start,
- code->val.str.len);
+ if (*seqp == NULL)
+ {
+ /* Compute the value in the charmap from the UCS value. */
+ const char *symbol = repertoire_find_symbol (repertoire,
+ now->val.ucs4);
- ctype->last_class_char = value;
+ if (symbol == NULL)
+ *seqp = NULL;
+ else
+ *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
- /* In the LC_CTYPE category it is no error when a character is
- not found. This has to be ignored silently. */
- return;
+ if (*seqp == NULL)
+ {
+ /* Insert a negative entry. */
+ static const struct charseq negative
+ = { .ucs4 = ILLEGAL_CHAR_VALUE };
+ uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4);
+ *newp = now->val.ucs4;
+
+ insert_entry (&repertoire->seq_table, newp, 4,
+ (void *) &negative);
+ }
+ else
+ (*seqp)->ucs4 = now->val.ucs4;
+ }
+ else if ((*seqp)->ucs4 != now->val.ucs4)
+ *seqp = NULL;
+
+ *wchp = now->val.ucs4;
+ }
+ else if (now->tok == tok_charcode)
+ {
+ /* We must map from the byte code to UCS4. */
+ *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
+ now->val.str.lenmb);
- *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
- &ctype->class_collection_act, value)
- |= ctype->current_class_mask;
+ if (*seqp == NULL)
+ *wchp = ILLEGAL_CHAR_VALUE;
+ else
+ {
+ if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
+ strlen ((*seqp)->name));
+ *wchp = (*seqp)->ucs4;
+ }
+ }
+ else
+ return 1;
+
+ return 0;
}
-void
-ctype_class_to (struct linereader *lr, struct localedef_t *locale,
- struct token *code, struct charset_t *charset)
+/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>'. */
+static void
+charclass_symbolic_ellipsis (struct linereader *ldfile,
+ struct locale_ctype_t *ctype,
+ struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct token *now,
+ const char *last_str,
+ unsigned long int class256_bit,
+ unsigned long int class_bit, int base,
+ int ignore_content, int handle_digits)
{
- struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
- unsigned int value, cnt;
+ const char *nowstr = now->val.str.startmb;
+ char tmp[now->val.str.lenmb + 1];
+ const char *cp;
+ char *endp;
+ unsigned long int from;
+ unsigned long int to;
- value = charset_find_value (&charset->char_table, code->val.str.start,
- code->val.str.len);
+ /* We have to compute the ellipsis values using the symbolic names. */
+ assert (last_str != NULL);
- /* In the LC_CTYPE category it is no error when a character is
- not found. This has to be ignored silently. */
- if ((wchar_t) ctype->last_class_char != ILLEGAL_CHAR_VALUE
- && (wchar_t) value != ILLEGAL_CHAR_VALUE)
- for (cnt = ctype->last_class_char + 1; cnt <= value; ++cnt)
- *find_idx (ctype, &ctype->class_collection, &ctype->class_collection_max,
- &ctype->class_collection_act, cnt)
- |= ctype->current_class_mask;
+ if (strlen (last_str) != now->val.str.lenmb)
+ {
+ invalid_range:
+ lr_error (ldfile,
+ _("`%s' and `%s' are no valid names for symbolic range"),
+ last_str, nowstr);
+ return;
+ }
- ctype->last_class_char = ILLEGAL_CHAR_VALUE;
-}
+ if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
+ /* Nothing to do, the names are the same. */
+ return;
+ for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
+ ;
-void
-ctype_class_end (struct linereader *lr, struct localedef_t *locale)
-{
- struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ errno = 0;
+ from = strtoul (cp, &endp, base);
+ if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
+ goto invalid_range;
- /* We have no special actions to perform here. */
- ctype->current_class_mask = 0;
- ctype->last_class_char = ILLEGAL_CHAR_VALUE;
-}
+ to = strtoul (nowstr + (cp - last_str), &endp, base);
+ if ((to == UINT_MAX && errno == ERANGE) || *endp != '\0' || from >= to)
+ goto invalid_range;
+ /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
+ if (!ignore_content)
+ {
+ now->val.str.startmb = tmp;
+ while (++from <= to)
+ {
+ struct charseq *seq;
+ uint32_t wch;
-/* Character map handling. */
-void
-ctype_map_new (struct linereader *lr, struct localedef_t *locale,
- enum token_t tok, struct token *code,
- struct charset_t *charset)
-{
- ctype_map_newP (lr, locale->categories[LC_CTYPE].ctype,
- code->val.str.start, charset);
-}
+ sprintf (tmp, (base == 10 ? "%.*s%0*d" : "%.*s%0*X"), cp - last_str,
+ last_str, now->val.str.lenmb - (cp - last_str), from);
+ get_character (now, charmap, repertoire, &seq, &wch);
-int
-ctype_is_charconv (struct linereader *lr, struct localedef_t *locale,
- const char *name)
-{
- struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
- size_t cnt;
+ if (seq != NULL && seq->nbytes == 1)
+ /* Yep, we can store information about this byte sequence. */
+ ctype->class256_collection[seq->bytes[0]] |= class256_bit;
- for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
- if (strcmp (name, ctype->mapnames[cnt]) == 0)
- return 1;
+ if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
+ /* We have the UCS4 position. */
+ *find_idx (ctype, &ctype->class_collection,
+ &ctype->class_collection_max,
+ &ctype->class_collection_act, wch) |= class_bit;
- return 0;
+ if (handle_digits == 1)
+ {
+ /* We must store the digit values. */
+ if (ctype->mbdigits_act == ctype->mbdigits_max)
+ {
+ ctype->mbdigits_max *= 2;
+ ctype->mbdigits = xrealloc (ctype->mbdigits,
+ (ctype->mbdigits_max
+ * sizeof (char *)));
+ ctype->wcdigits_max *= 2;
+ ctype->wcdigits = xrealloc (ctype->wcdigits,
+ (ctype->wcdigits_max
+ * sizeof (uint32_t)));
+ }
+
+ ctype->mbdigits[ctype->mbdigits_act++] = seq;
+ ctype->wcdigits[ctype->wcdigits_act++] = wch;
+ }
+ else if (handle_digits == 2)
+ {
+ /* We must store the digit values. */
+ if (ctype->outdigits_act >= 10)
+ {
+ lr_error (ldfile, _("\
+%s: field `%s' does not contain exactly ten entries"),
+ "LC_CTYPE", "outdigit");
+ return;
+ }
+
+ ctype->mboutdigits[ctype->outdigits_act] = seq;
+ ctype->wcoutdigits[ctype->outdigits_act] = wch;
+ ++ctype->outdigits_act;
+ }
+ }
+ }
}
-void
-ctype_map_start (struct linereader *lr, struct localedef_t *locale,
- enum token_t tok, const char *name, struct charset_t *charset)
+/* Ellipsis like in `<U1234>..<U2345>'. */
+static void
+charclass_ucs4_ellipsis (struct linereader *ldfile,
+ struct locale_ctype_t *ctype,
+ struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct token *now, uint32_t last_wch,
+ unsigned long int class256_bit,
+ unsigned long int class_bit, int ignore_content,
+ int handle_digits)
{
- struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
- size_t cnt;
-
- switch (tok)
+ if (last_wch > now->val.ucs4)
{
- case tok_toupper:
- ctype->toupper_done = 1;
- name = "toupper";
- break;
- case tok_tolower:
- ctype->tolower_done = 1;
- name = "tolower";
- break;
- case tok_ident:
- break;
- default:
- assert (! "unknown token in category `LC_CTYPE' should not happen");
+ lr_error (ldfile, _("\
+to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
+ (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
+ (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
+ return;
}
- for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
- if (strcmp (name, ctype->mapnames[cnt]) == 0)
- break;
+ if (!ignore_content)
+ while (++last_wch <= now->val.ucs4)
+ {
+ /* We have to find out whether there is a byte sequence corresponding
+ to this UCS4 value. */
+ struct charseq *seq = repertoire_find_seq (repertoire, last_wch);
- if (cnt == ctype->map_collection_nr)
- assert (! "unknown token in category `LC_CTYPE' should not happen");
+ /* If this is the first time we look for this sequence create a new
+ entry. */
+ if (seq == NULL)
+ {
+ /* Find the symbolic name for this UCS4 value. */
+ const char *symbol = repertoire_find_symbol (repertoire, last_wch);
+ uint32_t *newp = obstack_alloc (&repertoire->mem_pool, 4);
+ *newp = last_wch;
- ctype->last_map_idx = cnt;
- ctype->from_map_char = ILLEGAL_CHAR_VALUE;
-}
+ if (symbol != NULL)
+ /* We have a name, now search the multibyte value. */
+ seq = charmap_find_value (charmap, symbol, strlen (symbol));
+ if (seq == NULL)
+ {
+ /* We have to create a fake entry. */
+ static const struct charseq negative
+ = { .ucs4 = ILLEGAL_CHAR_VALUE };
+ seq = (struct charseq *) &negative;
+ }
+ else
+ seq->ucs4 = last_wch;
-void
-ctype_map_from (struct linereader *lr, struct localedef_t *locale,
- struct token *code, struct charset_t *charset)
-{
- struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
- unsigned int value;
+ insert_entry (&repertoire->seq_table, newp, 4, seq);
+ }
- value = charset_find_value (&charset->char_table, code->val.str.start,
- code->val.str.len);
+ /* We have a name, now search the multibyte value. */
+ if (seq->ucs4 == last_wch && seq->nbytes == 1)
+ /* Yep, we can store information about this byte sequence. */
+ ctype->class256_collection[(size_t) seq->bytes[0]]
+ |= class256_bit;
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
- /* In the LC_CTYPE category it is no error when a character is
- not found. This has to be ignored silently. */
- return;
+ /* And of course we have the UCS4 position. */
+ if (class_bit != 0 && class_bit != 0)
+ *find_idx (ctype, &ctype->class_collection,
+ &ctype->class_collection_max,
+ &ctype->class_collection_act, last_wch) |= class_bit;
- assert (ctype->last_map_idx < ctype->map_collection_nr);
+ if (handle_digits == 1)
+ {
+ /* We must store the digit values. */
+ if (ctype->mbdigits_act == ctype->mbdigits_max)
+ {
+ ctype->mbdigits_max *= 2;
+ ctype->mbdigits = xrealloc (ctype->mbdigits,
+ (ctype->mbdigits_max
+ * sizeof (char *)));
+ ctype->wcdigits_max *= 2;
+ ctype->wcdigits = xrealloc (ctype->wcdigits,
+ (ctype->wcdigits_max
+ * sizeof (uint32_t)));
+ }
+
+ ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
+ ? seq : NULL);
+ ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
+ }
+ else if (handle_digits == 2)
+ {
+ /* We must store the digit values. */
+ if (ctype->outdigits_act >= 10)
+ {
+ lr_error (ldfile, _("\
+%s: field `%s' does not contain exactly ten entries"),
+ "LC_CTYPE", "outdigit");
+ return;
+ }
- ctype->from_map_char = value;
+ ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
+ ? seq : NULL);
+ ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
+ ++ctype->outdigits_act;
+ }
+ }
}
-void
-ctype_map_to (struct linereader *lr, struct localedef_t *locale,
- struct token *code, struct charset_t *charset)
+/* Ellipsis as in `/xea/x12.../xea/x34'. */
+static void
+charclass_charcode_ellipsis (struct linereader *ldfile,
+ struct locale_ctype_t *ctype,
+ struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct token *now, char *last_charcode,
+ uint32_t last_charcode_len,
+ unsigned long int class256_bit,
+ unsigned long int class_bit, int ignore_content,
+ int handle_digits)
{
- struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
- unsigned int value;
-
- value = charset_find_value (&charset->char_table, code->val.str.start,
- code->val.str.len);
+ /* First check whether the to-value is larger. */
+ if (now->val.charcode.nbytes != last_charcode_len)
+ {
+ lr_error (ldfile, _("\
+start end end character sequence of range must have the same length"));
+ return;
+ }
- if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
- || (wchar_t) value == ILLEGAL_CHAR_VALUE)
+ if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
{
- /* In the LC_CTYPE category it is no error when a character is
- not found. This has to be ignored silently. */
- ctype->from_map_char = ILLEGAL_CHAR_VALUE;
+ lr_error (ldfile, _("\
+to-value character sequence is smaller than from-value sequence"));
return;
}
- *find_idx (ctype, &ctype->map_collection[ctype->last_map_idx],
- &ctype->map_collection_max[ctype->last_map_idx],
- &ctype->map_collection_act[ctype->last_map_idx],
- ctype->from_map_char) = value;
+ if (!ignore_content)
+ {
+ do
+ {
+ /* Increment the byte sequence value. */
+ struct charseq *seq;
+ uint32_t wch;
+ int i;
+
+ for (i = last_charcode_len - 1; i >= 0; --i)
+ if (++last_charcode[i] != 0)
+ break;
+
+ if (last_charcode_len == 1)
+ /* Of course we have the charcode value. */
+ ctype->class256_collection[(size_t) last_charcode[0]]
+ |= class256_bit;
+
+ /* Find the symbolic name. */
+ seq = charmap_find_symbol (charmap, last_charcode,
+ last_charcode_len);
+ if (seq != NULL)
+ {
+ if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ seq->ucs4 = repertoire_find_value (repertoire, seq->name,
+ strlen (seq->name));
+ wch = seq->ucs4;
+
+ if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
+ *find_idx (ctype, &ctype->class_collection,
+ &ctype->class_collection_max,
+ &ctype->class_collection_act, wch) |= class_bit;
+ }
+ else
+ wch = ILLEGAL_CHAR_VALUE;
- ctype->from_map_char = ILLEGAL_CHAR_VALUE;
+ if (handle_digits == 1)
+ {
+ /* We must store the digit values. */
+ if (ctype->mbdigits_act == ctype->mbdigits_max)
+ {
+ ctype->mbdigits_max *= 2;
+ ctype->mbdigits = xrealloc (ctype->mbdigits,
+ (ctype->mbdigits_max
+ * sizeof (char *)));
+ ctype->wcdigits_max *= 2;
+ ctype->wcdigits = xrealloc (ctype->wcdigits,
+ (ctype->wcdigits_max
+ * sizeof (uint32_t)));
+ }
+
+ seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
+ memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
+ seq->nbytes = last_charcode_len;
+
+ ctype->mbdigits[ctype->mbdigits_act++] = seq;
+ ctype->wcdigits[ctype->wcdigits_act++] = wch;
+ }
+ else if (handle_digits == 2)
+ {
+ struct charseq *seq;
+ /* We must store the digit values. */
+ if (ctype->outdigits_act >= 10)
+ {
+ lr_error (ldfile, _("\
+%s: field `%s' does not contain exactly ten entries"),
+ "LC_CTYPE", "outdigit");
+ return;
+ }
+
+ seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
+ memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
+ seq->nbytes = last_charcode_len;
+
+ ctype->mboutdigits[ctype->outdigits_act] = seq;
+ ctype->wcoutdigits[ctype->outdigits_act] = wch;
+ ++ctype->outdigits_act;
+ }
+ }
+ while (memcmp (last_charcode, now->val.charcode.bytes,
+ last_charcode_len) != 0);
+ }
}
-void
-ctype_map_end (struct linereader *lr, struct localedef_t *locale)
+/* Read one transliteration entry. */
+static uint32_t *
+read_widestring (struct linereader *ldfile, struct token *now,
+ struct charmap_t *charmap, struct repertoire_t *repertoire)
{
- struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
+ uint32_t *wstr;
+
+ if (now->tok == tok_default_missing)
+ /* The special name "" will denote this case. */
+ wstr = (uint32_t *) L"";
+ else if (now->tok == tok_bsymbol)
+ {
+ /* Get the value from the repertoire. */
+ wstr = xmalloc (2 * sizeof (uint32_t));
+ wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
+ now->val.str.lenmb);
+ if (wstr[0] == ILLEGAL_CHAR_VALUE)
+ /* We cannot proceed, we don't know the UCS4 value. */
+ return NULL;
+
+ wstr[1] = 0;
+ }
+ else if (now->tok == tok_ucs4)
+ {
+ wstr = xmalloc (2 * sizeof (uint32_t));
+ wstr[0] = now->val.ucs4;
+ wstr[1] = 0;
+ }
+ else if (now->tok == tok_charcode)
+ {
+ /* Argh, we have to convert to the symbol name first and then to the
+ UCS4 value. */
+ struct charseq *seq = charmap_find_symbol (charmap,
+ now->val.str.startmb,
+ now->val.str.lenmb);
+ if (seq == NULL)
+ /* Cannot find the UCS4 value. */
+ return NULL;
+
+ if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ seq->ucs4 = repertoire_find_value (repertoire, seq->name,
+ strlen (seq->name));
+ if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
+ /* We cannot proceed, we don't know the UCS4 value. */
+ return NULL;
+
+ wstr = xmalloc (2 * sizeof (uint32_t));
+ wstr[0] = seq->ucs4;
+ wstr[1] = 0;
+ }
+ else if (now->tok == tok_string)
+ {
+ wstr = now->val.str.startwc;
+ if (wstr[0] == 0)
+ return NULL;
+ }
+ else
+ {
+ if (now->tok != tok_eol && now->tok != tok_eof)
+ lr_ignore_rest (ldfile, 0);
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
+ return (uint32_t *) -1l;
+ }
- ctype->last_map_idx = MAX_NR_CHARMAP;
- ctype->from_map_char = ILLEGAL_CHAR_VALUE;
+ return wstr;
}
-/* Local functions. */
static void
-ctype_class_newP (struct linereader *lr, struct locale_ctype_t *ctype,
- const char *name)
+read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
+ struct token *now, struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
{
- size_t cnt;
+ uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
+ struct translit_t *result;
+ struct translit_to_t **top;
+ struct obstack *ob = &ctype->mem_pool;
+ int first;
+ int ignore;
+
+ if (from_wstr == NULL)
+ /* There is no valid from string. */
+ return;
- for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
- if (strcmp (ctype->classnames[cnt], name) == 0)
- break;
+ result = (struct translit_t *) obstack_alloc (ob,
+ sizeof (struct translit_t));
+ result->from = from_wstr;
+ result->next = NULL;
+ result->to = NULL;
+ top = &result->to;
+ first = 1;
+ ignore = 0;
- if (cnt < ctype->nr_charclass)
+ while (1)
{
- lr_error (lr, _("character class `%s' already defined"), name);
- return;
- }
+ uint32_t *to_wstr;
- if (ctype->nr_charclass == MAX_NR_CHARCLASS)
- /* Exit code 2 is prescribed in P1003.2b. */
- error (2, 0, _("\
-implementation limit: no more than %d character classes allowed"),
- MAX_NR_CHARCLASS);
+ /* Next we have one or more transliterations. They are
+ separated by semicolons. */
+ now = lr_token (ldfile, charmap, repertoire);
- ctype->classnames[ctype->nr_charclass++] = name;
+ if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
+ {
+ /* One string read. */
+ const uint32_t zero = 0;
+
+ if (!ignore)
+ {
+ obstack_grow (ob, &zero, 4);
+ to_wstr = obstack_finish (ob);
+
+ *top = obstack_alloc (ob, sizeof (struct translit_to_t));
+ (*top)->str = to_wstr;
+ (*top)->next = NULL;
+ }
+
+ if (now->tok == tok_eol)
+ {
+ result->next = ctype->translit;
+ ctype->translit = result;
+ return;
+ }
+
+ if (!ignore)
+ top = &(*top)->next;
+ ignore = 0;
+ }
+ else
+ {
+ to_wstr = read_widestring (ldfile, now, charmap, repertoire);
+ if (to_wstr == (uint32_t *) -1l)
+ {
+ /* An error occurred. */
+ obstack_free (ob, result);
+ return;
+ }
+
+ if (to_wstr == NULL)
+ ignore = 1;
+ else
+ /* This value is usable. */
+ obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
+
+ first = 0;
+ }
+ }
}
-static void
-ctype_map_newP (struct linereader *lr, struct locale_ctype_t *ctype,
- const char *name, struct charset_t *charset)
+/* The parser for the LC_CTYPE section of the locale definition. */
+void
+ctype_read (struct linereader *ldfile, struct localedef_t *result,
+ struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
{
- size_t max_chars = 0;
+ struct repertoire_t *repertoire = NULL;
+ struct locale_ctype_t *ctype;
+ struct token *now;
+ enum token_t nowtok;
size_t cnt;
+ struct charseq *last_seq;
+ uint32_t last_wch = 0;
+ enum token_t last_token;
+ enum token_t ellipsis_token;
+ char last_charcode[16];
+ size_t last_charcode_len = 0;
+ const char *last_str = NULL;
+ int mapidx;
- for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
- {
- if (strcmp (ctype->mapnames[cnt], name) == 0)
- break;
+ /* Get the repertoire we have to use. */
+ if (repertoire_name != NULL)
+ repertoire = repertoire_read (repertoire_name);
- if (max_chars < ctype->map_collection_max[cnt])
- max_chars = ctype->map_collection_max[cnt];
+ /* The rest of the line containing `LC_CTYPE' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+
+ do
+ {
+ now = lr_token (ldfile, charmap, NULL);
+ nowtok = now->tok;
}
+ while (nowtok == tok_eol);
- if (cnt < ctype->map_collection_nr)
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
{
- lr_error (lr, _("character map `%s' already defined"), name);
+ handle_copy (ldfile, charmap, repertoire, tok_lc_ctype, LC_CTYPE,
+ "LC_CTYPE", ignore_content);
return;
}
- if (ctype->map_collection_nr == MAX_NR_CHARMAP)
- /* Exit code 2 is prescribed in P1003.2b. */
- error (2, 0, _("\
-implementation limit: no more than %d character maps allowed"),
- MAX_NR_CHARMAP);
+ /* Prepare the data structures. */
+ ctype_startup (ldfile, result, charmap, ignore_content);
+ ctype = result->categories[LC_CTYPE].ctype;
- ctype->mapnames[cnt] = name;
+ /* Remember the repertoire we use. */
+ if (!ignore_content)
+ ctype->repertoire = repertoire;
- if (max_chars == 0)
- ctype->map_collection_max[cnt] = charset->mb_cur_max == 1 ? 256 : 512;
- else
- ctype->map_collection_max[cnt] = max_chars;
+ while (1)
+ {
+ unsigned long int class_bit = 0;
+ unsigned long int class256_bit = 0;
+ int handle_digits = 0;
- ctype->map_collection[cnt] = (u_int32_t *)
- xmalloc (sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
- memset (ctype->map_collection[cnt], '\0',
- sizeof (u_int32_t) * ctype->map_collection_max[cnt]);
- ctype->map_collection_act[cnt] = 256;
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
- ++ctype->map_collection_nr;
-}
+ /* Ingore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, NULL);
+ nowtok = now->tok;
+ continue;
+ }
+ switch (nowtok)
+ {
+ case tok_class:
+ /* We simply forget the `class' keyword and use the following
+ operand to determine the bit. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok == tok_ident || now->tok == tok_string)
+ {
+ /* Must be one of the predefined class names. */
+ for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
+ if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
+ break;
+ if (cnt >= ctype->nr_charclass)
+ {
+ if (now->val.str.lenmb == 8
+ && memcmp ("special1", now->val.str.startmb, 8) == 0)
+ class_bit = _ISwspecial1;
+ else if (now->val.str.lenmb == 8
+ && memcmp ("special2", now->val.str.startmb, 8) == 0)
+ class_bit = _ISwspecial2;
+ else if (now->val.str.lenmb == 8
+ && memcmp ("special3", now->val.str.startmb, 8) == 0)
+ class_bit = _ISwspecial3;
+ else
+ {
+ lr_error (ldfile, _("\
+unknown character class `%s' in category `LC_CTYPE'"),
+ now->val.str.startmb);
+ free (now->val.str.startmb);
+
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+ }
+ else
+ class_bit = _ISwbit (cnt);
+
+ free (now->val.str.startmb);
+ }
+ else if (now->tok == tok_digit)
+ goto handle_tok_digit;
+ else if (now->tok < tok_upper || now->tok > tok_blank)
+ goto err_label;
+ else
+ {
+ class_bit = BITw (now->tok);
+ class256_bit = BIT (now->tok);
+ }
-/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
- is possible if we only want to extend the name array. */
-static u_int32_t *
-find_idx (struct locale_ctype_t *ctype, u_int32_t **table, size_t *max,
- size_t *act, unsigned int idx)
-{
- size_t cnt;
+ /* The next character must be a semicolon. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_semicolon)
+ goto err_label;
+ goto read_charclass;
+
+ case tok_upper:
+ case tok_lower:
+ case tok_alpha:
+ case tok_alnum:
+ case tok_space:
+ case tok_cntrl:
+ case tok_punct:
+ case tok_graph:
+ case tok_print:
+ case tok_xdigit:
+ case tok_blank:
+ class_bit = BITw (now->tok);
+ class256_bit = BIT (now->tok);
+ handle_digits = 0;
+ read_charclass:
+ ctype->class_done |= class_bit;
+ last_token = tok_none;
+ ellipsis_token = tok_none;
+ now = lr_token (ldfile, charmap, NULL);
+ while (now->tok != tok_eol && now->tok != tok_eof)
+ {
+ uint32_t wch;
+ struct charseq *seq;
+
+ if (ellipsis_token == tok_none)
+ {
+ if (get_character (now, charmap, repertoire, &seq, &wch))
+ goto err_label;
+
+ if (!ignore_content && seq != NULL && seq->nbytes == 1)
+ /* Yep, we can store information about this byte
+ sequence. */
+ ctype->class256_collection[seq->bytes[0]] |= class256_bit;
+
+ if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
+ && class_bit != 0)
+ /* We have the UCS4 position. */
+ *find_idx (ctype, &ctype->class_collection,
+ &ctype->class_collection_max,
+ &ctype->class_collection_act, wch) |= class_bit;
+
+ last_token = now->tok;
+ last_str = now->val.str.startmb;
+ last_seq = seq;
+ last_wch = wch;
+ memcpy (last_charcode, now->val.charcode.bytes, 16);
+ last_charcode_len = now->val.charcode.nbytes;
+
+ if (!ignore_content && handle_digits == 1)
+ {
+ /* We must store the digit values. */
+ if (ctype->mbdigits_act == ctype->mbdigits_max)
+ {
+ ctype->mbdigits_max *= 2;
+ ctype->mbdigits = xrealloc (ctype->mbdigits,
+ (ctype->mbdigits_max
+ * sizeof (char *)));
+ ctype->wcdigits_max *= 2;
+ ctype->wcdigits = xrealloc (ctype->wcdigits,
+ (ctype->wcdigits_max
+ * sizeof (uint32_t)));
+ }
+
+ ctype->mbdigits[ctype->mbdigits_act++] = seq;
+ ctype->wcdigits[ctype->wcdigits_act++] = wch;
+ }
+ else if (!ignore_content && handle_digits == 2)
+ {
+ /* We must store the digit values. */
+ if (ctype->outdigits_act >= 10)
+ {
+ lr_error (ldfile, _("\
+%s: field `%s' does not contain exactly ten entries"),
+ "LC_CTYPE", "outdigit");
+ goto err_label;
+ }
+
+ ctype->mboutdigits[ctype->outdigits_act] = seq;
+ ctype->wcoutdigits[ctype->outdigits_act] = wch;
+ ++ctype->outdigits_act;
+ }
+ }
+ else
+ {
+ /* Now it gets complicated. We have to resolve the
+ ellipsis problem. First we must distinguish between
+ the different kind of ellipsis and this must match the
+ tokens we have seen. */
+ assert (last_token != tok_none);
+
+ if (last_token != now->tok)
+ {
+ lr_error (ldfile, _("\
+ellipsis range must be marked by two operands of same type"));
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (last_token == tok_bsymbol)
+ {
+ if (ellipsis_token == tok_ellipsis3)
+ lr_error (ldfile, _("with symbolic name range values \
+the absolute ellipsis `...' must not be used"));
+
+ charclass_symbolic_ellipsis (ldfile, ctype, charmap,
+ repertoire, now, last_str,
+ class256_bit, class_bit,
+ (ellipsis_token
+ == tok_ellipsis4
+ ? 10 : 16),
+ ignore_content,
+ handle_digits);
+ }
+ else if (last_token == tok_ucs4)
+ {
+ if (ellipsis_token != tok_ellipsis2)
+ lr_error (ldfile, _("\
+with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
+
+ charclass_ucs4_ellipsis (ldfile, ctype, charmap,
+ repertoire, now, last_wch,
+ class256_bit, class_bit,
+ ignore_content, handle_digits);
+ }
+ else
+ {
+ assert (last_token == tok_charcode);
+
+ if (ellipsis_token != tok_ellipsis3)
+ lr_error (ldfile, _("\
+with character code range values one must use the absolute ellipsis `...'"));
+
+ charclass_charcode_ellipsis (ldfile, ctype, charmap,
+ repertoire, now,
+ last_charcode,
+ last_charcode_len,
+ class256_bit, class_bit,
+ ignore_content,
+ handle_digits);
+ }
+
+ /* Now we have used the last value. */
+ last_token = tok_none;
+ }
+
+ /* Next we expect a semicolon or the end of the line. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok == tok_eol || now->tok == tok_eof)
+ break;
+
+ if (last_token != tok_none
+ && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4)
+ {
+ ellipsis_token = now->tok;
+ now = lr_token (ldfile, charmap, NULL);
+ continue;
+ }
+
+ if (now->tok != tok_semicolon)
+ goto err_label;
+
+ /* And get the next character. */
+ now = lr_token (ldfile, charmap, NULL);
+
+ ellipsis_token = tok_none;
+ }
+ break;
+
+ case tok_digit:
+ handle_tok_digit:
+ class_bit = _ISwdigit;
+ class256_bit = _ISdigit;
+ handle_digits = 1;
+ goto read_charclass;
+
+ case tok_outdigit:
+ if (ctype->outdigits_act != 0)
+ lr_error (ldfile, _("\
+%s: field `%s' declared more than once"),
+ "LC_CTYPE", "outdigit");
+ class_bit = 0;
+ class256_bit = 0;
+ handle_digits = 2;
+ goto read_charclass;
+
+ case tok_toupper:
+ mapidx = 0;
+ goto read_mapping;
+
+ case tok_tolower:
+ mapidx = 1;
+ goto read_mapping;
+
+ case tok_map:
+ /* We simply forget the `map' keyword and use the following
+ operand to determine the mapping. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok == tok_ident || now->tok == tok_string)
+ {
+ size_t cnt;
- if (idx < 256)
- return table == NULL ? NULL : &(*table)[idx];
+ for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
+ if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
+ break;
- for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
- if (ctype->charnames[cnt] == idx)
- break;
+ if (cnt < ctype->map_collection_nr)
+ mapidx = cnt;
+ else
+ {
+ lr_error (ldfile, _("unknown map `%s'"),
+ now->val.str.startmb);
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ }
+ else if (now->tok < tok_toupper || now->tok > tok_tolower)
+ goto err_label;
+ else
+ mapidx = now->tok - tok_toupper;
- /* We have to distinguish two cases: the name is found or not. */
- if (cnt == ctype->charnames_act)
- {
- /* Extend the name array. */
- if (ctype->charnames_act == ctype->charnames_max)
- {
- ctype->charnames_max *= 2;
- ctype->charnames = (unsigned int *)
- xrealloc (ctype->charnames,
- sizeof (unsigned int) * ctype->charnames_max);
- }
- ctype->charnames[ctype->charnames_act++] = idx;
- }
+ now = lr_token (ldfile, charmap, NULL);
+ /* This better should be a semicolon. */
+ if (now->tok != tok_semicolon)
+ goto err_label;
- if (table == NULL)
- /* We have done everything we are asked to do. */
- return NULL;
+ read_mapping:
+ /* Test whether this mapping was already defined. */
+ if (ctype->tomap_done[mapidx])
+ {
+ lr_error (ldfile, _("duplicated definition for mapping `%s'"),
+ ctype->mapnames[mapidx]);
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ ctype->tomap_done[mapidx] = 1;
- if (cnt >= *act)
- {
- if (cnt >= *max)
- {
- size_t old_max = *max;
- do
- *max *= 2;
- while (*max <= cnt);
+ now = lr_token (ldfile, charmap, NULL);
+ while (now->tok != tok_eol && now->tok != tok_eof)
+ {
+ struct charseq *from_seq;
+ uint32_t from_wch;
+ struct charseq *to_seq;
+ uint32_t to_wch;
+
+ /* Every pair starts with an opening brace. */
+ if (now->tok != tok_open_brace)
+ goto err_label;
+
+ /* Next comes the from-value. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (get_character (now, charmap, repertoire, &from_seq,
+ &from_wch) != 0)
+ goto err_label;
+
+ /* The next is a comma. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_comma)
+ goto err_label;
+
+ /* And the other value. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (get_character (now, charmap, repertoire, &to_seq,
+ &to_wch) != 0)
+ goto err_label;
+
+ /* And the last thing is the closing brace. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_close_brace)
+ goto err_label;
+
+ if (!ignore_content)
+ {
+ if (mapidx < 2 && from_seq != NULL && to_seq != NULL
+ && from_seq->nbytes == 1 && to_seq->nbytes == 1)
+ /* We can use this value. */
+ ctype->map256_collection[mapidx][from_seq->bytes[0]]
+ = to_seq->bytes[0];
+
+ if (from_wch != ILLEGAL_CHAR_VALUE
+ && to_wch != ILLEGAL_CHAR_VALUE)
+ /* Both correct values. */
+ *find_idx (ctype, &ctype->map_collection[mapidx],
+ &ctype->map_collection_max[mapidx],
+ &ctype->map_collection_act[mapidx],
+ from_wch) = to_wch;
+ }
+
+ /* Now comes a semicolon or the end of the line/file. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok == tok_semicolon)
+ now = lr_token (ldfile, charmap, NULL);
+ }
+ break;
- *table =
- (u_int32_t *) xrealloc (*table, *max * sizeof (unsigned long int));
- memset (&(*table)[old_max], '\0',
- (*max - old_max) * sizeof (u_int32_t));
+ case tok_translit_start:
+ /* The rest of the line better should be empty. */
+ lr_ignore_rest (ldfile, 1);
+
+ /* We count here the number of allocated entries in the `translit'
+ array. */
+ cnt = 0;
+
+ /* We proceed until we see the `translit_end' token. */
+ while (now = lr_token (ldfile, charmap, repertoire),
+ now->tok != tok_translit_end && now->tok != tok_eof)
+ {
+ if (now->tok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (now->tok == tok_translit_end)
+ {
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ if (now->tok == tok_include)
+ {
+ /* We have to include locale. */
+ const char *locale_name;
+ const char *repertoire_name;
+
+ now = lr_token (ldfile, charmap, NULL);
+ /* This should be a string or an identifier. In any
+ case something to name a locale. */
+ if (now->tok != tok_string && now->tok != tok_ident)
+ {
+ translit_syntax:
+ lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+ locale_name = now->val.str.startmb;
+
+ /* Next should be a semicolon. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_semicolon)
+ goto translit_syntax;
+
+ /* Now the repertoire name. */
+ now = lr_token (ldfile, charmap, NULL);
+ if ((now->tok != tok_string && now->tok != tok_ident)
+ || now->val.str.startmb == NULL)
+ goto translit_syntax;
+ repertoire_name = now->val.str.startmb;
+
+ /* We must not have more than one `include'. */
+ if (ctype->translit_copy_locale != NULL)
+ {
+ lr_error (ldfile, _("\
+%s: only one `include' instruction allowed"), "LC_CTYPE");
+ lr_ignore_rest (ldfile, 0);
+ continue;
+ }
+
+ ctype->translit_copy_locale = locale_name;
+ ctype->translit_copy_repertoire = repertoire_name;
+
+ /* The rest of the line must be empty. */
+ lr_ignore_rest (ldfile, 1);
+ continue;
+ }
+
+ read_translit_entry (ldfile, ctype, now, charmap, repertoire);
+ }
+ break;
+
+ case tok_ident:
+ /* This could mean one of several things. First test whether
+ it's a character class name. */
+ for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
+ if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
+ break;
+ if (cnt < ctype->nr_charclass)
+ {
+ class_bit = _ISwbit (cnt);
+ class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
+ free (now->val.str.startmb);
+ goto read_charclass;
+ }
+ if (strcmp (now->val.str.startmb, "special1") == 0)
+ {
+ class_bit = _ISwspecial1;
+ free (now->val.str.startmb);
+ goto read_charclass;
+ }
+ if (strcmp (now->val.str.startmb, "special2") == 0)
+ {
+ class_bit = _ISwspecial2;
+ free (now->val.str.startmb);
+ goto read_charclass;
+ }
+ if (strcmp (now->val.str.startmb, "special3") == 0)
+ {
+ class_bit = _ISwspecial3;
+ free (now->val.str.startmb);
+ goto read_charclass;
+ }
+ if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
+ {
+ mapidx = 2;
+ goto read_mapping;
+ }
+ break;
+
+ case tok_end:
+ /* Next we assume `LC_CTYPE'. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok == tok_eof)
+ break;
+ if (now->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"),
+ "LC_CTYPE");
+ else if (now->tok != tok_lc_ctype)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
+ lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
+ return;
+
+ default:
+ err_label:
+ if (now->tok != tok_eof)
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
}
- (*table)[cnt] = 0;
- *act = cnt;
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, NULL);
+ nowtok = now->tok;
}
- return &(*table)[cnt];
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
}
static void
-set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
+set_class_defaults (struct locale_ctype_t *ctype, struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
{
+ size_t cnt;
+
/* These function defines the default values for the classes and conversions
according to POSIX.2 2.5.2.1.
It may seem that the order of these if-blocks is arbitrary but it is NOT.
Don't move them unless you know what you do! */
- void set_default (int bit, int from, int to)
+ void set_default (int bitpos, int from, int to)
{
char tmp[2];
int ch;
+ int bit = _ISbit (bitpos);
+ int bitw = _ISwbit (bitpos);
/* Define string. */
strcpy (tmp, "?");
for (ch = from; ch <= to; ++ch)
{
- unsigned int value;
+ uint32_t value;
+ struct charseq *seq;
tmp[0] = ch;
- value = charset_find_value (&charset->char_table, tmp, 1);
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
+ value = repertoire_find_value (repertoire, tmp, 1);
+ if (value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- tmp);
- continue;
+%s: character `%s' not defined in repertoire while needed as default value"),
+ "LC_CTYPE", tmp);
}
else
- ELEM (ctype, class_collection, , value) |= bit;
+ ELEM (ctype, class_collection, , value) |= bitw;
+
+ seq = charmap_find_value (charmap, tmp, 1);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined in charmap while needed as default value"),
+ "LC_CTYPE", tmp);
+ }
+ else if (seq->nbytes != 1)
+ error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", tmp);
+ else
+ ctype->class256_collection[seq->bytes[0]] |= bit;
}
}
/* Set default values if keyword was not present. */
- if ((ctype->class_done & BIT (tok_upper)) == 0)
+ if ((ctype->class_done & BITw (tok_upper)) == 0)
/* "If this keyword [lower] is not specified, the lowercase letters
`A' through `Z', ..., shall automatically belong to this class,
with implementation defined character values." [P1003.2, 2.5.2.1] */
- set_default (BIT (tok_upper), 'A', 'Z');
+ set_default (BITPOS (tok_upper), 'A', 'Z');
- if ((ctype->class_done & BIT (tok_lower)) == 0)
+ if ((ctype->class_done & BITw (tok_lower)) == 0)
/* "If this keyword [lower] is not specified, the lowercase letters
`a' through `z', ..., shall automatically belong to this class,
with implementation defined character values." [P1003.2, 2.5.2.1] */
- set_default (BIT (tok_lower), 'a', 'z');
+ set_default (BITPOS (tok_lower), 'a', 'z');
- if ((ctype->class_done & BIT (tok_alpha)) == 0)
+ if ((ctype->class_done & BITw (tok_alpha)) == 0)
{
/* Table 2-6 in P1003.2 says that characters in class `upper' or
class `lower' *must* be in class `alpha'. */
unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
- size_t cnt;
for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
if ((ctype->class_collection[cnt] & mask) != 0)
ctype->class_collection[cnt] |= BIT (tok_alpha);
}
- if ((ctype->class_done & BIT (tok_digit)) == 0)
+ if ((ctype->class_done & BITw (tok_digit)) == 0)
/* "If this keyword [digit] is not specified, the digits `0' through
`9', ..., shall automatically belong to this class, with
implementation-defined character values." [P1003.2, 2.5.2.1] */
- set_default (BIT (tok_digit), '0', '9');
+ set_default (BITPOS (tok_digit), '0', '9');
/* "Only characters specified for the `alpha' and `digit' keyword
shall be specified. Characters specified for the keyword `alpha'
and `digit' are automatically included in this class. */
{
unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
- size_t cnt;
for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
if ((ctype->class_collection[cnt] & mask) != 0)
ctype->class_collection[cnt] |= BIT (tok_alnum);
}
- if ((ctype->class_done & BIT (tok_space)) == 0)
+ if ((ctype->class_done & BITw (tok_space)) == 0)
/* "If this keyword [space] is not specified, the characters <space>,
<form-feed>, <newline>, <carriage-return>, <tab>, and
<vertical-tab>, ..., shall automatically belong to this class,
with implementation-defined character values." [P1003.2, 2.5.2.1] */
{
- unsigned int value;
+ uint32_t value;
+ struct charseq *seq;
- value = charset_find_value (&charset->char_table, "space", 5);
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
+ value = repertoire_find_value (repertoire, "space", 5);
+ if (value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- "<space>");
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<space>");
}
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (&charset->char_table, "form-feed", 9);
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
+ seq = charmap_find_value (charmap, "space", 5);
+ if (seq == NULL)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- "<form-feed>");
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<space>");
+ }
+ else if (seq->nbytes != 1)
+ error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<space>");
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+
+ value = repertoire_find_value (repertoire, "form-feed", 9);
+ if (value == ILLEGAL_CHAR_VALUE)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<form-feed>");
}
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (&charset->char_table, "newline", 7);
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
+ seq = charmap_find_value (charmap, "form-feed", 9);
+ if (seq == NULL)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- "<newline>");
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<form-feed>");
+ }
+ else if (seq->nbytes != 1)
+ error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<form-feed>");
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+
+ value = repertoire_find_value (repertoire, "newline", 7);
+ if (value == ILLEGAL_CHAR_VALUE)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<newline>");
}
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (&charset->char_table, "carriage-return", 15);
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
+ seq = charmap_find_value (charmap, "newline", 7);
+ if (seq == NULL)
{
if (!be_quiet)
error (0, 0, _("\
character `%s' not defined while needed as default value"),
- "<carriage-return>");
+ "<newline>");
+ }
+ else if (seq->nbytes != 1)
+ error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<newline>");
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+
+ value = repertoire_find_value (repertoire, "carriage-return", 15);
+ if (value == ILLEGAL_CHAR_VALUE)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<carriage-return>");
}
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (&charset->char_table, "tab", 3);
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
+ seq = charmap_find_value (charmap, "carriage-return", 15);
+ if (seq == NULL)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- "<tab>");
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<carriage-return>");
+ }
+ else if (seq->nbytes != 1)
+ error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<carriage-return>");
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+
+ value = repertoire_find_value (repertoire, "tab", 3);
+ if (value == ILLEGAL_CHAR_VALUE)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<tab>");
}
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (&charset->char_table, "vertical-tab", 12);
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
+ seq = charmap_find_value (charmap, "tab", 3);
+ if (seq == NULL)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- "<vertical-tab>");
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<tab>");
+ }
+ else if (seq->nbytes != 1)
+ error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<tab>");
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
+
+
+ value = repertoire_find_value (repertoire, "vertical-tab", 12);
+ if (value == ILLEGAL_CHAR_VALUE)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<vertical-tab>");
}
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
+
+ seq = charmap_find_value (charmap, "vertical-tab", 12);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<vertical-tab>");
+ }
+ else if (seq->nbytes != 1)
+ error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<vertical-tab>");
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
}
- if ((ctype->class_done & BIT (tok_xdigit)) == 0)
+ if ((ctype->class_done & BITw (tok_xdigit)) == 0)
/* "If this keyword is not specified, the digits `0' to `9', the
uppercase letters `A' through `F', and the lowercase letters `a'
through `f', ..., shell automatically belong to this class, with
implementation defined character values." [P1003.2, 2.5.2.1] */
{
- set_default (BIT (tok_xdigit), '0', '9');
- set_default (BIT (tok_xdigit), 'A', 'F');
- set_default (BIT (tok_xdigit), 'a', 'f');
+ set_default (BITPOS (tok_xdigit), '0', '9');
+ set_default (BITPOS (tok_xdigit), 'A', 'F');
+ set_default (BITPOS (tok_xdigit), 'a', 'f');
}
- if ((ctype->class_done & BIT (tok_blank)) == 0)
+ if ((ctype->class_done & BITw (tok_blank)) == 0)
/* "If this keyword [blank] is unspecified, the characters <space> and
<tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
{
- unsigned int value;
+ uint32_t value;
+ struct charseq *seq;
- value = charset_find_value (&charset->char_table, "space", 5);
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
+ value = repertoire_find_value (repertoire, "space", 5);
+ if (value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- "<space>");
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<space>");
}
else
ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
- value = charset_find_value (&charset->char_table, "tab", 3);
- if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
+ seq = charmap_find_value (charmap, "space", 5);
+ if (seq == NULL)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- "<tab>");
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<space>");
+ }
+ else if (seq->nbytes != 1)
+ error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<space>");
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
+
+
+ value = repertoire_find_value (repertoire, "tab", 3);
+ if (value == ILLEGAL_CHAR_VALUE)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<tab>");
}
else
ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
+
+ seq = charmap_find_value (charmap, "tab", 3);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<tab>");
+ }
+ else if (seq->nbytes != 1)
+ error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<tab>");
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
}
- if ((ctype->class_done & BIT (tok_graph)) == 0)
+ if ((ctype->class_done & BITw (tok_graph)) == 0)
/* "If this keyword [graph] is not specified, characters specified for
the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
shall belong to this character class." [P1003.2, 2.5.2.1] */
@@ -1142,9 +2603,13 @@ character `%s' not defined while needed as default value"),
for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
if ((ctype->class_collection[cnt] & mask) != 0)
ctype->class_collection[cnt] |= BIT (tok_graph);
+
+ for (cnt = 0; cnt < 256; ++cnt)
+ if ((ctype->class256_collection[cnt] & mask) != 0)
+ ctype->class256_collection[cnt] |= BIT (tok_graph);
}
- if ((ctype->class_done & BIT (tok_print)) == 0)
+ if ((ctype->class_done & BITw (tok_print)) == 0)
/* "If this keyword [print] is not provided, characters specified for
the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
and the <space> character shall belong to this character class."
@@ -1153,25 +2618,46 @@ character `%s' not defined while needed as default value"),
unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
size_t cnt;
- wchar_t space;
+ uint32_t space;
+ struct charseq *seq;
for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
if ((ctype->class_collection[cnt] & mask) != 0)
ctype->class_collection[cnt] |= BIT (tok_print);
- space = charset_find_value (&charset->char_table, "space", 5);
+ for (cnt = 0; cnt < 256; ++cnt)
+ if ((ctype->class256_collection[cnt] & mask) != 0)
+ ctype->class256_collection[cnt] |= BIT (tok_print);
+
+
+ space = repertoire_find_value (repertoire, "space", 5);
if (space == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- "<space>");
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<space>");
}
else
ELEM (ctype, class_collection, , space) |= BIT (tok_print);
+
+ seq = charmap_find_value (charmap, "space", 5);
+ if (seq == NULL)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", "<space>");
+ }
+ else if (seq->nbytes != 1)
+ error (0, 0, _("\
+%s: character `%s' in charmap not representable with one byte"),
+ "LC_CTYPE", "<space>");
+ else
+ ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
}
- if (ctype->toupper_done == 0)
+ if (ctype->tomap_done[0] == 0)
/* "If this keyword [toupper] is not specified, the lowercase letters
`a' through `z', and their corresponding uppercase letters `A' to
`Z', ..., shall automatically be included, with implementation-
@@ -1184,55 +2670,133 @@ character `%s' not defined while needed as default value"),
for (ch = 'a'; ch <= 'z'; ++ch)
{
- unsigned int value_from, value_to;
+ uint32_t value_from, value_to;
+ struct charseq *seq_from, *seq_to;
tmp[1] = (char) ch;
- value_from = charset_find_value (&charset->char_table, &tmp[1], 1);
- if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE)
+ value_from = repertoire_find_value (repertoire, &tmp[1], 1);
+ if (value_from == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- tmp);
- continue;
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", tmp);
+ }
+ else
+ {
+ /* This conversion is implementation defined. */
+ tmp[1] = (char) (ch + ('A' - 'a'));
+ value_to = repertoire_find_value (repertoire, &tmp[1], 1);
+ if (value_to == ILLEGAL_CHAR_VALUE)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", tmp);
+ }
+ else
+ /* The index [0] is determined by the order of the
+ `ctype_map_newP' calls in `ctype_startup'. */
+ ELEM (ctype, map_collection, [0], value_from) = value_to;
}
- /* This conversion is implementation defined. */
- tmp[1] = (char) (ch + ('A' - 'a'));
- value_to = charset_find_value (&charset->char_table, &tmp[1], 1);
- if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE)
+ seq_from = charmap_find_value (charmap, &tmp[1], 1);
+ if (seq_from == NULL)
{
if (!be_quiet)
error (0, 0, _("\
-character `%s' not defined while needed as default value"),
- tmp);
- continue;
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", tmp);
+ }
+ else if (seq_from->nbytes != 1)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' needed as default value not representable with one byte"),
+ "LC_CTYPE", tmp);
+ }
+ else
+ {
+ /* This conversion is implementation defined. */
+ tmp[1] = (char) (ch + ('A' - 'a'));
+ seq_to = charmap_find_value (charmap, &tmp[1], 1);
+ if (seq_to == NULL)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' not defined while needed as default value"),
+ "LC_CTYPE", tmp);
+ }
+ else if (seq_to->nbytes != 1)
+ {
+ if (!be_quiet)
+ error (0, 0, _("\
+%s: character `%s' needed as default value not representable with one byte"),
+ "LC_CTYPE", tmp);
+ }
+ else
+ /* The index [0] is determined by the order of the
+ `ctype_map_newP' calls in `ctype_startup'. */
+ ctype->map256_collection[0][seq_from->bytes[0]]
+ = seq_to->bytes[0];
}
-
- /* The index [0] is determined by the order of the
- `ctype_map_newP' calls in `ctype_startup'. */
- ELEM (ctype, map_collection, [0], value_from) = value_to;
}
}
- if (ctype->tolower_done == 0)
+ if (ctype->tomap_done[1] == 0)
/* "If this keyword [tolower] is not specified, the mapping shall be
the reverse mapping of the one specified to `toupper'." [P1003.2] */
{
- size_t cnt;
-
for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
if (ctype->map_collection[0][cnt] != 0)
ELEM (ctype, map_collection, [1],
ctype->map_collection[0][cnt])
= ctype->charnames[cnt];
+
+ for (cnt = 0; cnt < 256; ++cnt)
+ if (ctype->map256_collection[0][cnt] != 0)
+ ctype->map_collection[1][ctype->map_collection[0][cnt]]
+ = ctype->charnames[cnt];
+ }
+
+ if (ctype->outdigits_act == 0)
+ {
+ for (cnt = 0; cnt < 10; ++cnt)
+ {
+ ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
+ digits + cnt, 1);
+
+ if (ctype->mboutdigits[cnt] == NULL)
+ {
+ ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
+ longnames[cnt],
+ strlen (longnames[cnt]));
+
+ if (ctype->mboutdigits[cnt] == NULL)
+ {
+ /* Provide a replacement. */
+ error (0, 0, _("\
+no output digits defined and none of the standard names in the charmap"));
+
+ ctype->mboutdigits[cnt] = obstack_alloc (&charmap->mem_pool,
+ sizeof (struct charseq) + 1);
+
+ /* This is better than nothing. */
+ ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
+ ctype->mboutdigits[cnt]->nbytes = 1;
+ }
+ }
+ }
+
+ ctype->outdigits_act = 10;
}
}
static void
-allocate_arrays (struct locale_ctype_t *ctype, struct charset_t *charset)
+allocate_arrays (struct locale_ctype_t *ctype, struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
{
size_t idx;
@@ -1300,12 +2864,12 @@ Computing table size for character classes might take a while..."),
# define NAMES_B2 ctype->names_el
#endif
- ctype->names_eb = (u_int32_t *) xcalloc (ctype->plane_size
- * ctype->plane_cnt,
- sizeof (u_int32_t));
- ctype->names_el = (u_int32_t *) xcalloc (ctype->plane_size
- * ctype->plane_cnt,
- sizeof (u_int32_t));
+ ctype->names_eb = (uint32_t *) xcalloc (ctype->plane_size
+ * ctype->plane_cnt,
+ sizeof (uint32_t));
+ ctype->names_el = (uint32_t *) xcalloc (ctype->plane_size
+ * ctype->plane_cnt,
+ sizeof (uint32_t));
for (idx = 1; idx < 256; ++idx)
NAMES_B1[idx] = idx;
@@ -1330,7 +2894,7 @@ Computing table size for character classes might take a while..."),
NAMES_B1[0] = 0;
for (idx = 0; idx < ctype->plane_size * ctype->plane_cnt; ++idx)
- NAMES_B2[idx] = SWAPU32 (NAMES_B1[idx]);
+ NAMES_B2[idx] = bswap_32 (NAMES_B1[idx]);
/* You wonder about this amount of memory? This is only because some
@@ -1353,10 +2917,9 @@ Computing table size for character classes might take a while..."),
# define TRANS32(w) (w)
#endif
- for (idx = 0; idx < ctype->class_collection_act; ++idx)
- if (ctype->charnames[idx] < 256)
- ctype->ctype_b[128 + ctype->charnames[idx]]
- = TRANS (ctype->class_collection[idx]);
+ /* This is the array accessed usig the multibyte string elements. */
+ for (idx = 0; idx < 256; ++idx)
+ ctype->ctype_b[128 + idx] = TRANS (ctype->class256_collection[idx]);
/* Mirror first 127 entries. We must take care that entry -1 is not
mirrored because EOF == -1. */
@@ -1369,10 +2932,10 @@ Computing table size for character classes might take a while..."),
= TRANS32 (ctype->class_collection[idx]);
/* Room for table of mappings. */
- ctype->map_eb = (u_int32_t **) xmalloc (ctype->map_collection_nr
- * sizeof (u_int32_t *));
- ctype->map_el = (u_int32_t **) xmalloc (ctype->map_collection_nr
- * sizeof (u_int32_t *));
+ ctype->map_eb = (uint32_t **) xmalloc (ctype->map_collection_nr
+ * sizeof (uint32_t *));
+ ctype->map_el = (uint32_t **) xmalloc (ctype->map_collection_nr
+ * sizeof (uint32_t *));
/* Fill in all mappings. */
for (idx = 0; idx < ctype->map_collection_nr; ++idx)
@@ -1380,12 +2943,12 @@ Computing table size for character classes might take a while..."),
unsigned int idx2;
/* Allocate table. */
- ctype->map_eb[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
- * ctype->plane_cnt + 128)
- * sizeof (u_int32_t));
- ctype->map_el[idx] = (u_int32_t *) xmalloc ((ctype->plane_size
- * ctype->plane_cnt + 128)
- * sizeof (u_int32_t));
+ ctype->map_eb[idx] = (uint32_t *) xmalloc ((ctype->plane_size
+ * ctype->plane_cnt + 128)
+ * sizeof (uint32_t));
+ ctype->map_el[idx] = (uint32_t *) xmalloc ((ctype->plane_size
+ * ctype->plane_cnt + 128)
+ * sizeof (uint32_t));
#if __BYTE_ORDER == __LITTLE_ENDIAN
# define MAP_B1 ctype->map_el
@@ -1397,13 +2960,11 @@ Computing table size for character classes might take a while..."),
/* Copy default value (identity mapping). */
memcpy (&MAP_B1[idx][128], NAMES_B1,
- ctype->plane_size * ctype->plane_cnt * sizeof (u_int32_t));
+ ctype->plane_size * ctype->plane_cnt * sizeof (uint32_t));
/* Copy values from collection. */
- for (idx2 = 0; idx2 < ctype->map_collection_act[idx]; ++idx2)
- if (ctype->map_collection[idx][idx2] != 0)
- MAP_B1[idx][128 + ctype->charnames[idx2]] =
- ctype->map_collection[idx][idx2];
+ for (idx2 = 0; idx2 < 256; ++idx2)
+ MAP_B1[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
/* Mirror first 127 entries. We must take care not to map entry
-1 because EOF == -1. */
@@ -1415,14 +2976,14 @@ Computing table size for character classes might take a while..."),
/* And now the other byte order. */
for (idx2 = 0; idx2 < ctype->plane_size * ctype->plane_cnt + 128; ++idx2)
- MAP_B2[idx][idx2] = SWAPU32 (MAP_B1[idx][idx2]);
+ MAP_B2[idx][idx2] = bswap_32 (MAP_B1[idx][idx2]);
}
/* Extra array for class and map names. */
- ctype->class_name_ptr = (u_int32_t *) xmalloc (ctype->nr_charclass
- * sizeof (u_int32_t));
- ctype->map_name_ptr = (u_int32_t *) xmalloc (ctype->map_collection_nr
- * sizeof (u_int32_t));
+ ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
+ * sizeof (uint32_t));
+ ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
+ * sizeof (uint32_t));
/* Array for width information. Because the expected width are very
small we use only one single byte. This save space and we need
@@ -1430,16 +2991,17 @@ Computing table size for character classes might take a while..."),
ctype->width = (unsigned char *) xmalloc (ctype->plane_size
* ctype->plane_cnt);
/* Initialize with default width value. */
- memset (ctype->width, charset->width_default,
+ memset (ctype->width, charmap->width_default,
ctype->plane_size * ctype->plane_cnt);
- if (charset->width_rules != NULL)
+ if (charmap->width_rules != NULL)
{
+#if 0
size_t cnt;
- for (cnt = 0; cnt < charset->nwidth_rules; ++cnt)
- if (charset->width_rules[cnt].width != charset->width_default)
- for (idx = charset->width_rules[cnt].from;
- idx <= charset->width_rules[cnt].to; ++idx)
+ for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
+ if (charmap->width_rules[cnt].width != charmap->width_default)
+ for (idx = charmap->width_rules[cnt].from;
+ idx <= charmap->width_rules[cnt].to; ++idx)
{
size_t nr = idx % ctype->plane_size;
size_t depth = 0;
@@ -1449,15 +3011,229 @@ Computing table size for character classes might take a while..."),
assert (depth < ctype->plane_cnt);
ctype->width[nr + depth * ctype->plane_size]
- = charset->width_rules[cnt].width;
+ = charmap->width_rules[cnt].width;
}
+#else
+ abort ();
+#endif
}
- /* Compute MB_CUR_MAX. */
- ctype->mb_cur_max = charset->mb_cur_max;
+ /* Set MB_CUR_MAX. */
+ ctype->mb_cur_max = charmap->mb_cur_max;
/* We need the name of the currently used 8-bit character set to
make correct conversion between this 8-bit representation and the
ISO 10646 character set used internally for wide characters. */
- ctype->codeset_name = charset->code_set_name ? : "";
+ ctype->codeset_name = charmap->code_set_name;
+
+ /* Now determine the table for the transliteration information.
+
+ XXX It is not yet clear to me whether it is worth implementing a
+ complicated algorithm which uses a hash table to locate the entries.
+ For now I'll use a simple array which can be searching using binary
+ search. */
+ if (ctype->translit_copy_locale != NULL)
+ {
+ /* Fold in the transliteration information from the locale mentioned
+ in the `include' statement. */
+ struct locale_ctype_t *here = ctype;
+
+ do
+ {
+ struct localedef_t *other = find_locale (LC_CTYPE,
+ here->translit_copy_locale,
+ repertoire->name, charmap);
+
+ if (other == NULL)
+ {
+ error (0, 0, _("\
+%s: transliteration data from locale `%s' not available"),
+ "LC_CTYPE", here->translit_copy_locale);
+ break;
+ }
+
+ here = other->categories[LC_CTYPE].ctype;
+
+ /* Enqueue the information if necessary. */
+ if (here->translit != NULL)
+ {
+ struct translit_t *endp = here->translit;
+ while (endp->next != NULL)
+ endp = endp->next;
+
+ endp->next = ctype->translit;
+ ctype->translit = here->translit;
+ }
+ }
+ while (here->translit_copy_locale != NULL);
+ }
+
+ if (ctype->translit != NULL)
+ {
+ /* First count how many entries we have. This is the upper limit
+ since some entries from the included files might be overwritten. */
+ size_t number = 0;
+ size_t cnt;
+ struct translit_t *runp = ctype->translit;
+ struct translit_t **sorted;
+ size_t from_len, to_len;
+
+ while (runp != NULL)
+ {
+ ++number;
+ runp = runp->next;
+ }
+
+ /* Next we allocate an array large enough and fill in the values. */
+ sorted = alloca (number * sizeof (struct translit_t **));
+ runp = ctype->translit;
+ number = 0;
+ do
+ {
+ /* Search for the place where to insert this string.
+ XXX Better use a real sorting algorithm later. */
+ size_t idx = 0;
+ int replace = 0;
+
+ while (idx < number)
+ {
+ int res = wcscmp ((const wchar_t *) sorted[idx]->from,
+ (const wchar_t *) runp->from);
+ if (res == 0)
+ {
+ replace = 1;
+ break;
+ }
+ if (res > 0)
+ break;
+ ++idx;
+ }
+
+ if (replace)
+ sorted[idx] = runp;
+ else
+ {
+ memmove (&sorted[idx + 1], &sorted[idx],
+ (number - idx) * sizeof (struct translit_t *));
+ sorted[idx] = runp;
+ ++number;
+ }
+
+ runp = runp->next;
+ }
+ while (runp != NULL);
+
+ /* The next step is putting all the possible transliteration
+ strings in one memory block so that we can write it out.
+ We need several different blocks:
+ - index to the tfromstring array
+ - from-string array
+ - index to the to-string array
+ - to-string array.
+ And this all must be available for both endianes variants.
+ */
+ from_len = to_len = 0;
+ for (cnt = 0; cnt < number; ++cnt)
+ {
+ struct translit_to_t *srunp;
+ from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
+ srunp = sorted[cnt]->to;
+ while (srunp != NULL)
+ {
+ to_len += wcslen ((const wchar_t *) srunp->str) + 1;
+ srunp = srunp->next;
+ }
+ /* Plus one for the extra NUL character marking the end of
+ the list for the current entry. */
+ ++to_len;
+ }
+
+ /* We can allocate the arrays for the results. */
+#if BYTE_ORDER == LITTLE_ENDIAN
+# define from_idx translit_from_idx_el
+# define from_tbl translit_from_tbl_el
+# define to_idx translit_to_idx_el
+# define to_tbl translit_to_tbl_el
+# define from_idx_ob translit_from_idx_eb
+# define from_tbl_ob translit_from_tbl_eb
+# define to_idx_ob translit_to_idx_eb
+# define to_tbl_ob translit_to_tbl_eb
+#else
+# define from_idx translit_from_idx_eb
+# define from_tbl translit_from_tbl_eb
+# define to_idx translit_to_idx_eb
+# define to_tbl translit_to_tbl_eb
+# define from_idx_ob translit_from_idx_el
+# define from_tbl_ob translit_from_tbl_el
+# define to_idx_ob translit_to_idx_el
+# define to_tbl_ob translit_to_tbl_el
+#endif
+ ctype->from_idx = xmalloc (number * sizeof (uint32_t));
+ ctype->from_idx_ob = xmalloc (number * sizeof (uint32_t));
+ ctype->from_tbl = xmalloc (from_len * sizeof (uint32_t));
+ ctype->from_tbl_ob = xmalloc (from_len * sizeof (uint32_t));
+ ctype->to_idx = xmalloc (number * sizeof (uint32_t));
+ ctype->to_idx_ob = xmalloc (number * sizeof (uint32_t));
+ ctype->to_tbl = xmalloc (to_len * sizeof (uint32_t));
+ ctype->to_tbl_ob = xmalloc (to_len * sizeof (uint32_t));
+
+ from_len = 0;
+ to_len = 0;
+ for (cnt = 0; cnt < number; ++cnt)
+ {
+ size_t len;
+ struct translit_to_t *srunp;
+
+ ctype->from_idx[cnt] = from_len;
+ ctype->to_idx[cnt] = to_len;
+
+ len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
+ wmemcpy ((wchar_t *) &ctype->from_tbl[from_len],
+ (const wchar_t *) sorted[cnt]->from, len);
+ from_len += len;
+
+ ctype->to_idx[cnt] = to_len;
+ srunp = sorted[cnt]->to;
+ while (srunp != NULL)
+ {
+ len = wcslen ((const wchar_t *) srunp->str) + 1;
+ wmemcpy ((wchar_t *) &ctype->to_tbl[to_len],
+ (const wchar_t *) srunp->str, len);
+ to_len += len;
+ srunp = srunp->next;
+ }
+ ctype->to_tbl[to_len++] = L'\0';
+ }
+
+ /* Now create the tables for the other endianess. */
+ for (cnt = 0; cnt < number; ++cnt)
+ {
+ ctype->from_idx_ob[cnt] = bswap_32 (ctype->from_idx[cnt]);
+ ctype->to_idx_ob[cnt] = bswap_32 (ctype->to_idx[cnt]);
+ }
+ for (cnt = 0; cnt < from_len; ++cnt)
+ ctype->from_tbl[cnt] = bswap_32 (ctype->from_tbl_ob[cnt]);
+ for (cnt = 0; cnt < to_len; ++cnt)
+ ctype->to_tbl[cnt] = bswap_32 (ctype->to_tbl_ob[cnt]);
+
+ /* Store the information about the length. */
+ ctype->translit_idx_size = number * sizeof (uint32_t);
+ ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
+ ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
+ }
+ else
+ {
+ /* Provide some dummy pointers since we have nothing to write out. */
+ static uint32_t no_str = { 0 };
+
+ ctype->translit_from_idx_el = &no_str;
+ ctype->translit_from_idx_eb = &no_str;
+ ctype->translit_from_tbl_el = &no_str;
+ ctype->translit_from_tbl_eb = &no_str;
+ ctype->translit_to_tbl_el = &no_str;
+ ctype->translit_to_tbl_eb = &no_str;
+ ctype->translit_idx_size = 0;
+ ctype->translit_from_tbl_size = 0;
+ ctype->translit_to_tbl_size = 0;
+ }
}