summaryrefslogtreecommitdiff
path: root/locale/programs/ld-collate.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-08-31 07:04:41 +0000
committerUlrich Drepper <drepper@redhat.com>1999-08-31 07:04:41 +0000
commit4b10dd6c1959577f57850ca427a94fe22b9f3299 (patch)
treeb385d9b27e5a40d5baf7cd7e27c7cc5ef7129b5b /locale/programs/ld-collate.c
parent1d1740d6b12894ed6a430e2e98bf73c5243b2925 (diff)
downloadglibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.tar
glibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.tar.gz
glibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.tar.bz2
glibc-4b10dd6c1959577f57850ca427a94fe22b9f3299.zip
Update.
* locale/Makefile (distribute): Add iso-639.def and iso-3166.def. Change charset.h to charmap.h. (categories): Add new categories. Leave out collate for now. Update build rules. * locale/categories.def: Add definitions for new categories. * locale/langinfo.h: Likewise. * locale/locale.h: Likewise. * locale/C-address.c: New file. * locale/C-identification.c: New file. * locale/C-measurement.c: New file. * locale/C-name.c: New file. * locale/C-paper.c: New file. * locale/C-telephone.c: New file. * locale/lc-address.c: Likewise. * locale/lc-identification.c: Likewise. * locale/lc-measurement.c: Likewise. * locale/lc-name.c: Likewise. * locale/lc-paper.c: Likewise. * locale/lc-telephone.c: Likewise. * locale/C-ctype.c: Update for locale rewrite. * locale/C-messages.c: Likewise. * locale/C-monetary.c: Likewise. * locale/C-time.c: Likewise. * locale/lc-collate.c: Likewise. * locale/lc-ctype.c: Likewise. * locale/lc-monetary.c: Likewise. * locale/lc-time.c: Likewise. * locale/localeinfo.h: Likewise. * locale/newlocale.c: Likewise. * locale/setlocale.c: Likewise. * locale/weight.h: Likewise. * locale/findlocale.c: Unconditionally use mmap. Handle new categories. * locale/loadlocale.c: Likewise. * locale/iso-3166.def: New file. * locale/iso-639.def: New file. * locale/programs/charmap-kw.gperf: Add new keywords. * locale/programs/locfile-kw.gperf: Likewise. * locale/programs/locfile-token.h: Define new tokens. * locale/programs/charmap.c: Rewrite to handle multibyte charsets. * locale/programs/charmap.h: New file. * locale/programs/charset.h: Removed. * locale/programs/config.h: Add __LC_LAST. * locale/programs/lc-address.c: New file. * locale/programs/lc-identification.c: New file. * locale/programs/lc-measurement.c: New file. * locale/programs/lc-name.c: New file. * locale/programs/lc-paper.c: New file. * locale/programs/lc-telephone.c: New file. * locale/programs/lc-collate.c: Update for locale rewrite. * locale/programs/lc-ctype.c: Likewise. * locale/programs/lc-messages.c: Likewise. * locale/programs/lc-monetary.c: Likewise. * locale/programs/lc-numeric.c: Likewise. * locale/programs/lc-time.c: Likewise. * locale/programs/locale.c: Likewise. * locale/programs/localedef.c: Likewise. * locale/programs/locfile.c: Likewise. * locale/programs/repertoire.c: Likewise. * locale/programs/repertoire.h: Likewise. * locale/programs/locfile.c: Update prototypes. Update handle_copy definition. * locale/programs/linereader.c: Add handling of wide char strings and new definition file syntax. * locale/programs/linereader.h (struct token): Add elements for wide character strings. * locale/programs/locale-spec.c: Disable handling of collation elements for now. * locale/programs/simple-hash.h: Cleanup. * locale/programs/stringtrans.h: Handle quite of end of line. * string/strcoll.c: Fall back on strcmp for now. * string/strxfrm.c: Fall back on strncpy/strlen for now. * time/strftime.c: Use new wide character data for wcsftime. * time/strptime.c: Remove _nl_C_LC_TIME declaration. * wctype/cname-lookup.h: Update for new LC_CTYPE data.
Diffstat (limited to 'locale/programs/ld-collate.c')
-rw-r--r--locale/programs/ld-collate.c1819
1 files changed, 1545 insertions, 274 deletions
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index 265bfd0af1..3c1267420c 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -1,6 +1,6 @@
/* Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
@@ -21,32 +21,1034 @@
# include <config.h>
#endif
-#include <endian.h>
-#include <errno.h>
-#include <limits.h>
-#include <locale.h>
-#include <obstack.h>
+#include <error.h>
#include <stdlib.h>
-#include <string.h>
-#include <wchar.h>
-#include <libintl.h>
+#include "charmap.h"
#include "localeinfo.h"
-#include "locales.h"
-#include "simple-hash.h"
-#include "stringtrans.h"
-#include "strlen-hash.h"
+#include "linereader.h"
+#include "locfile.h"
+#include "localedef.h"
/* Uncomment the following line in the production version. */
/* #define NDEBUG 1 */
#include <assert.h>
+#define obstack_chunk_alloc malloc
+#define obstack_chunk_free free
+
+/* Forward declaration. */
+struct element_t;
+
+/* Data type for list of strings. */
+struct section_list
+{
+ struct section_list *next;
+ /* Name of the section. */
+ const char *name;
+ /* First element of this section. */
+ struct element_t *first;
+ /* Last element of this section. */
+ struct element_t *last;
+ /* These are the rules for this section. */
+ enum coll_sort_rule *rules;
+};
+
+/* Data type for collating element. */
+struct element_t
+{
+ const char *mbs;
+ const uint32_t *wcs;
+ int order;
+
+ struct element_t **weights;
+
+ /* Where does the definition come from. */
+ const char *file;
+ size_t line;
+
+ /* Which section does this belong to. */
+ struct section_list *section;
+
+ /* Predecessor and successor in the order list. */
+ struct element_t *last;
+ struct element_t *next;
+};
+
+/* Data type for collating symbol. */
+struct symbol_t
+{
+ /* Point to place in the order list. */
+ struct element_t *order;
+
+ /* Where does the definition come from. */
+ const char *file;
+ size_t line;
+};
+
+
+/* The real definition of the struct for the LC_COLLATE locale. */
+struct locale_collate_t
+{
+ int col_weight_max;
+ int cur_weight_max;
+
+ /* List of known scripts. */
+ struct section_list *sections;
+ /* Current section using definition. */
+ struct section_list *current_section;
+ /* There always can be an unnamed section. */
+ struct section_list unnamed_section;
+ /* To make handling of errors easier we have another section. */
+ struct section_list error_section;
+
+ /* Number of sorting rules given in order_start line. */
+ uint32_t nrules;
+
+ /* Start of the order list. */
+ struct element_t *start;
+
+ /* The undefined element. */
+ struct element_t undefined;
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
+ /* This is the cursor for `reorder_after' insertions. */
+ struct element_t *cursor;
-#define SWAPU32(w) \
- (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
+ /* Remember whether last weight was an ellipsis. */
+ int was_ellipsis;
+
+ /* Known collating elements. */
+ hash_table elem_table;
+
+ /* Known collating symbols. */
+ hash_table sym_table;
+
+ /* Known collation sequences. */
+ hash_table seq_table;
+
+ struct obstack mempool;
+
+ /* The LC_COLLATE category is a bit special as it is sometimes possible
+ that the definitions from more than one input file contains information.
+ Therefore we keep all relevant input in a list. */
+ struct locale_collate_t *next;
+};
+
+
+/* We have a few global variables which are used for reading all
+ LC_COLLATE category descriptions in all files. */
+static int nrules;
+
+
+static struct section_list *
+make_seclist_elem (struct locale_collate_t *collate, const char *string,
+ struct section_list *next)
+{
+ struct section_list *newp;
+
+ newp = (struct section_list *) obstack_alloc (&collate->mempool,
+ sizeof (*newp));
+ newp->next = next;
+ newp->name = string;
+ newp->first = NULL;
+
+ return newp;
+}
+
+
+static struct element_t *
+new_element (struct locale_collate_t *collate, const char *mbs,
+ const uint32_t *wcs)
+{
+ struct element_t *newp;
+
+ newp = (struct element_t *) obstack_alloc (&collate->mempool,
+ sizeof (*newp));
+ newp->mbs = mbs;
+ newp->wcs = wcs;
+ newp->order = 0;
+
+ newp->file = NULL;
+ newp->line = 0;
+
+ newp->section = NULL;
+
+ newp->last = NULL;
+ newp->next = NULL;
+
+ return newp;
+}
+
+
+static struct symbol_t *
+new_symbol (struct locale_collate_t *collate)
+{
+ struct symbol_t *newp;
+ newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
+
+ newp->order = NULL;
+
+ newp->file = NULL;
+ newp->line = 0;
+
+ return newp;
+}
+
+
+/* Test whether this name is already defined somewhere. */
+static int
+check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
+ struct charmap_t *charmap, struct repertoire_t *repertoire,
+ const char *symbol, size_t symbol_len)
+{
+ void *ignore = NULL;
+
+ if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
+ {
+ lr_error (ldfile, _("`%s' already defined in charmap"), symbol);
+ return 1;
+ }
+
+ if (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) == 0)
+ {
+ lr_error (ldfile, _("`%s' already defined in repertoire"), symbol);
+ return 1;
+ }
+
+ if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
+ {
+ lr_error (ldfile, _("`%s' already defined as collating symbol"), symbol);
+ return 1;
+ }
+
+ if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
+ {
+ lr_error (ldfile, _("`%s' already defined as collating element"),
+ symbol);
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* Read the direction specification. */
+static void
+read_directions (struct linereader *ldfile, struct token *arg,
+ struct charmap_t *charmap, struct repertoire_t *repertoire,
+ struct locale_collate_t *collate)
+{
+ int cnt = 0;
+ int max = nrules ?: 10;
+ enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
+ int warned = 0;
+
+ while (1)
+ {
+ int valid = 0;
+
+ if (arg->tok == tok_forward)
+ {
+ if (rules[cnt] & sort_backward)
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: `forward' and `backward' are mutually excluding each other"),
+ "LC_COLLATE");
+ warned = 1;
+ }
+ }
+ else if (rules[cnt] & sort_forward)
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: `%s' mentioned twice in definition of weight %d"),
+ "LC_COLLATE", "forward", cnt + 1);
+ }
+ }
+ else
+ rules[cnt] |= sort_forward;
+
+ valid = 1;
+ }
+ else if (arg->tok == tok_backward)
+ {
+ if (rules[cnt] & sort_forward)
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: `forward' and `backward' are mutually excluding each other"),
+ "LC_COLLATE");
+ warned = 1;
+ }
+ }
+ else if (rules[cnt] & sort_backward)
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: `%s' mentioned twice in definition of weight %d"),
+ "LC_COLLATE", "backward", cnt + 1);
+ }
+ }
+ else
+ rules[cnt] |= sort_backward;
+
+ valid = 1;
+ }
+ else if (arg->tok == tok_position)
+ {
+ if (rules[cnt] & sort_position)
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: `%s' mentioned twice in definition of weight %d in category `%s'"),
+ "LC_COLLATE", "position", cnt + 1);
+ }
+ }
+ else
+ rules[cnt] |= sort_position;
+
+ valid = 1;
+ }
+
+ if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
+ || arg->tok == tok_semicolon)
+ {
+ if (! valid && ! warned)
+ {
+ lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
+ warned = 1;
+ }
+
+ /* See whether we have to increment the counter. */
+ if (arg->tok != tok_comma && rules[cnt] != 0)
+ ++cnt;
+
+ if (arg->tok == tok_eof || arg->tok == tok_eol)
+ /* End of line or file, so we exit the loop. */
+ break;
+
+ if (nrules == 0)
+ {
+ /* See whether we have enough room in the array. */
+ if (cnt == max)
+ {
+ max += 10;
+ rules = (enum coll_sort_rule *) xrealloc (rules,
+ max
+ * sizeof (*rules));
+ memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
+ }
+ }
+ else
+ {
+ if (cnt == nrules)
+ {
+ /* There must not be any more rule. */
+ if (! warned)
+ {
+ lr_error (ldfile, _("\
+%s: too many rules; first entry only had %d"),
+ "LC_COLLATE", nrules);
+ warned = 1;
+ }
+
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ }
+ }
+ else
+ {
+ if (! warned)
+ {
+ lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
+ warned = 1;
+ }
+ }
+
+ arg = lr_token (ldfile, charmap, repertoire);
+ }
+
+ if (nrules == 0)
+ {
+ /* Now we know how many rules we have. */
+ nrules = cnt;
+ rules = (enum coll_sort_rule *) xrealloc (rules,
+ nrules * sizeof (*rules));
+ }
+ else
+ {
+ if (cnt < nrules)
+ {
+ /* Not enough rules in this specification. */
+ if (! warned)
+ lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
+
+ do
+ rules[cnt] = sort_forward;
+ while (++cnt < nrules);
+ }
+ }
+
+ collate->current_section->rules = rules;
+}
+
+
+static void
+insert_value (struct linereader *ldfile, struct token *arg,
+ struct charmap_t *charmap, struct repertoire_t *repertoire,
+ struct locale_collate_t *collate)
+{
+ /* First find out what kind of symbol this is. */
+ struct charseq *seq;
+ uint32_t wc;
+ struct element_t *elem = NULL;
+ int weight_cnt;
+
+ /* First determine the wide character. There must be such a value,
+ otherwise we ignore it (if it is no collatio symbol or element). */
+ wc = repertoire_find_value (repertoire, arg->val.str.startmb,
+ arg->val.str.lenmb);
+
+ /* Try to find the character in the charmap. */
+ seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb);
+
+ if (wc == ILLEGAL_CHAR_VALUE)
+ {
+ /* It's no character, so look through the collation elements and
+ symbol list. */
+ void *result;
+
+ if (find_entry (&collate->sym_table, arg->val.str.startmb,
+ arg->val.str.lenmb, &result) == 0)
+ {
+ /* It's a collation symbol. */
+ struct symbol_t *sym = (struct symbol_t *) result;
+ elem = sym->order;
+ }
+ else if (find_entry (&collate->elem_table, arg->val.str.startmb,
+ arg->val.str.lenmb, &result) != 0)
+ /* It's also no collation element. Therefore ignore it. */
+ return;
+ }
+
+ /* XXX elem must be defined. */
+
+ /* Test whether this element is not already in the list. */
+ if (elem->next != NULL)
+ {
+ lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"),
+ arg->val.str.startmb, arg->val.str.lenmb,
+ elem->file, elem->line);
+ return;
+ }
+
+ /* Initialize all the fields. */
+ elem->file = ldfile->fname;
+ elem->line = ldfile->lineno;
+ elem->last = collate->cursor;
+ elem->next = collate->cursor ? collate->cursor->next : NULL;
+ elem->weights = (struct element_t **)
+ obstack_alloc (&collate->mempool, nrules * sizeof (struct element_t *));
+ memset (elem->weights, '\0', nrules * sizeof (struct element_t *));
+
+ if (collate->current_section->first == NULL)
+ collate->current_section->first = elem;
+ if (collate->current_section->last == collate->cursor)
+ collate->current_section->last = elem;
+
+ collate->cursor = elem;
+
+ /* Now read the rest of the line. */
+ ldfile->return_widestr = 1;
+
+ weight_cnt = 0;
+ do
+ {
+ arg = lr_token (ldfile, charmap, repertoire);
+
+ if (arg->tok == tok_eof || arg->tok == tok_eol)
+ {
+ /* This means the rest of the line uses the current element
+ as the weight. */
+ do
+ elem->weights[weight_cnt] = elem;
+ while (++weight_cnt < nrules);
+
+ return;
+ }
+
+ if (arg->tok == tok_ignore)
+ {
+ /* The weight for this level has to be ignored. We use the
+ null pointer to indicate this. */
+ }
+ else if (arg->tok == tok_bsymbol)
+ {
+
+ }
+ }
+ while (++weight_cnt < nrules);
+
+ lr_ignore_rest (ldfile, weight_cnt == nrules);
+}
+
+
+static void
+collate_startup (struct linereader *ldfile, struct localedef_t *locale,
+ int ignore_content)
+{
+ if (!ignore_content)
+ {
+ struct locale_collate_t *collate;
+
+ collate = locale->categories[LC_COLLATE].collate =
+ (struct locale_collate_t *) xcalloc (1,
+ sizeof (struct locale_collate_t));
+
+ /* Init the various data structures. */
+ init_hash (&collate->elem_table, 100);
+ init_hash (&collate->sym_table, 100);
+ init_hash (&collate->seq_table, 500);
+ obstack_init (&collate->mempool);
+
+ collate->col_weight_max = -1;
+ }
+
+ ldfile->translate_strings = 1;
+ ldfile->return_widestr = 0;
+}
+
+
+void
+collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
+{
+}
+
+
+void
+collate_output (struct localedef_t *locale, struct charmap_t *charmap,
+ const char *output_path)
+{
+}
+
+
+void
+collate_read (struct linereader *ldfile, struct localedef_t *result,
+ struct charmap_t *charmap, const char *repertoire_name,
+ int ignore_content)
+{
+ struct repertoire_t *repertoire = NULL;
+ struct locale_collate_t *collate;
+ struct token *now;
+ struct token *arg;
+ enum token_t nowtok;
+ int state = 0;
+ int was_ellipsis = 0;
+
+ /* Get the repertoire we have to use. */
+ if (repertoire_name != NULL)
+ repertoire = repertoire_read (repertoire_name);
+
+ /* The rest of the line containing `LC_COLLATE' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+ do
+ {
+ now = lr_token (ldfile, charmap, NULL);
+ nowtok = now->tok;
+ }
+ while (nowtok == tok_eol);
+
+ if (nowtok == tok_copy)
+ {
+ state = 2;
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_string)
+ goto err_label;
+ /* XXX Use the name */
+ lr_ignore_rest (ldfile, 1);
+
+ now = lr_token (ldfile, charmap, NULL);
+ nowtok = now->tok;
+ }
+
+ /* Prepare the data structures. */
+ collate_startup (ldfile, result, ignore_content);
+ collate = result->categories[LC_COLLATE].collate;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ingore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, NULL);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+ case tok_coll_weight_max:
+ if (state != 0)
+ goto err_label;
+
+ arg = lr_token (ldfile, charmap, NULL);
+ if (arg->tok != tok_number)
+ goto err_label;
+ if (collate->col_weight_max != -1)
+ lr_error (ldfile, _("%s: duplicate definition of `%s'"),
+ "LC_COLLATE", "col_weight_max");
+ else
+ collate->col_weight_max = arg->val.num;
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_section_symbol:
+ if (state != 0)
+ goto err_label;
+
+ arg = lr_token (ldfile, charmap, repertoire);
+ if (arg->tok != tok_bsymbol)
+ goto err_label;
+ else if (!ignore_content)
+ {
+ /* Check whether this section is already known. */
+ struct section_list *known = collate->sections;
+ while (known != NULL)
+ if (strcmp (known->name, arg->val.str.startmb) == 0)
+ break;
+
+ if (known != NULL)
+ {
+ lr_error (ldfile,
+ _("%s: duplicate declaration of section `%s'"),
+ "LC_COLLATE", arg->val.str.startmb);
+ free (arg->val.str.startmb);
+ }
+ else
+ collate->sections = make_seclist_elem (collate,
+ arg->val.str.startmb,
+ collate->sections);
+
+ lr_ignore_rest (ldfile, known == NULL);
+ }
+ else
+ {
+ free (arg->val.str.startmb);
+ lr_ignore_rest (ldfile, 1);
+ }
+ break;
+
+ case tok_collating_element:
+ if (state != 0)
+ goto err_label;
+
+ arg = lr_token (ldfile, charmap, repertoire);
+ if (arg->tok != tok_bsymbol)
+ goto err_label;
+ else
+ {
+ const char *symbol = arg->val.str.startmb;
+ size_t symbol_len = arg->val.str.lenmb;
+
+ /* Next the `from' keyword. */
+ arg = lr_token (ldfile, charmap, repertoire);
+ if (arg->tok != tok_from)
+ {
+ free ((char *) symbol);
+ goto err_label;
+ }
+
+ ldfile->return_widestr = 1;
+
+ /* Finally the string with the replacement. */
+ arg = lr_token (ldfile, charmap, repertoire);
+ ldfile->return_widestr = 0;
+ if (arg->tok != tok_string)
+ goto err_label;
+
+ if (!ignore_content)
+ {
+ if (symbol == NULL)
+ lr_error (ldfile, _("\
+%s: unknown character in collating element name"),
+ "LC_COLLATE");
+ if (arg->val.str.startmb == NULL)
+ lr_error (ldfile, _("\
+%s: unknown character in collating element definition"),
+ "LC_COLLATE");
+ if (arg->val.str.startwc == NULL)
+ lr_error (ldfile, _("\
+%s: unknown wide character in collating element definition"),
+ "LC_COLLATE");
+ else if (arg->val.str.lenwc < 2)
+ lr_error (ldfile, _("\
+%s: substitution string in collating element definition must have at least two characters"),
+ "LC_COLLATE");
+
+ if (symbol != NULL)
+ {
+ /* The name is already defined. */
+ if (check_duplicate (ldfile, collate, charmap,
+ repertoire, symbol, symbol_len))
+ goto col_elem_free;
+
+ if (insert_entry (&collate->elem_table,
+ symbol, symbol_len,
+ new_element (collate,
+ arg->val.str.startmb,
+ arg->val.str.startwc))
+ < 0)
+ lr_error (ldfile, _("\
+error while adding collating element"));
+ }
+ else
+ goto col_elem_free;
+ }
+ else
+ {
+ col_elem_free:
+ if (symbol != NULL)
+ free ((char *) symbol);
+ if (arg->val.str.startmb != NULL)
+ free (arg->val.str.startmb);
+ if (arg->val.str.startwc != NULL)
+ free (arg->val.str.startwc);
+ }
+ lr_ignore_rest (ldfile, 1);
+ }
+ break;
+
+ case tok_collating_symbol:
+ if (state != 0)
+ goto err_label;
+
+ arg = lr_token (ldfile, charmap, repertoire);
+ if (arg->tok != tok_bsymbol)
+ goto err_label;
+ else
+ {
+ const char *symbol = arg->val.str.startmb;
+ size_t symbol_len = arg->val.str.lenmb;
+
+ if (!ignore_content)
+ {
+ if (symbol == NULL)
+ lr_error (ldfile, _("\
+%s: unknown character in collating symbol name"),
+ "LC_COLLATE");
+ else
+ {
+ /* The name is already defined. */
+ if (check_duplicate (ldfile, collate, charmap,
+ repertoire, symbol, symbol_len))
+ goto col_sym_free;
+
+ if (insert_entry (&collate->sym_table,
+ symbol, symbol_len,
+ new_symbol (collate)) < 0)
+ lr_error (ldfile, _("\
+error while adding collating symbol"));
+ }
+ }
+ else
+ {
+ col_sym_free:
+ if (symbol != NULL)
+ free ((char *) symbol);
+ }
+ lr_ignore_rest (ldfile, 1);
+ }
+ break;
+
+ case tok_symbol_equivalence:
+ if (state != 0)
+ goto err_label;
+
+ arg = lr_token (ldfile, charmap, repertoire);
+ if (arg->tok != tok_bsymbol)
+ goto err_label;
+ else
+ {
+ const char *newname = arg->val.str.startmb;
+ size_t newname_len = arg->val.str.lenmb;
+ const char *symname;
+ size_t symname_len;
+ struct symbol_t *symval;
+
+ arg = lr_token (ldfile, charmap, repertoire);
+ if (arg->tok != tok_bsymbol)
+ {
+ if (newname != NULL)
+ free ((char *) newname);
+ goto err_label;
+ }
+
+ symname = arg->val.str.startmb;
+ symname_len = arg->val.str.lenmb;
+
+ if (!ignore_content)
+ {
+ if (newname == NULL)
+ {
+ lr_error (ldfile, _("\
+%s: unknown character in equivalent definition name"),
+ "LC_COLLATE");
+ goto sym_equiv_free;
+ }
+ if (symname == NULL)
+ {
+ lr_error (ldfile, _("\
+%s: unknown character in equivalent definition value"),
+ "LC_COLLATE");
+ goto sym_equiv_free;
+ }
+ /* The name is already defined. */
+ if (check_duplicate (ldfile, collate, charmap,
+ repertoire, symname, symname_len))
+ goto col_sym_free;
+
+ /* See whether the symbol name is already defined. */
+ if (find_entry (&collate->sym_table, symname, symname_len,
+ (void **) &symval) != 0)
+ {
+ lr_error (ldfile, _("\
+%s: unknown symbol `%s' in equivalent definition"),
+ "LC_COLLATE", symname);
+ goto col_sym_free;
+ }
+
+ if (insert_entry (&collate->sym_table,
+ newname, newname_len, symval) < 0)
+ {
+ lr_error (ldfile, _("\
+error while adding equivalent collating symbol"));
+ goto sym_equiv_free;
+ }
+
+ free ((char *) symname);
+ }
+ else
+ {
+ sym_equiv_free:
+ if (newname != NULL)
+ free ((char *) newname);
+ if (symname != NULL)
+ free ((char *) symname);
+ }
+ lr_ignore_rest (ldfile, 1);
+ }
+ break;
+
+ case tok_order_start:
+ if (state != 0 && state != 1)
+ goto err_label;
+ state = 1;
+
+ /* The 14652 draft does not specify whether all `order_start' lines
+ must contain the same number of sort-rules, but 14651 does. So
+ we require this here as well. */
+ arg = lr_token (ldfile, charmap, repertoire);
+ if (arg->tok == tok_bsymbol)
+ {
+ /* This better should be a section name. */
+ struct section_list *sp = collate->sections;
+ while (sp != NULL
+ && strcmp (sp->name, arg->val.str.startmb) != 0)
+ sp = sp->next;
+
+ if (sp == NULL)
+ {
+ lr_error (ldfile, _("\
+%s: unknown section name `%s'"),
+ "LC_COLLATE", arg->val.str.startmb);
+ /* We use the error section. */
+ collate->current_section = &collate->error_section;
+ }
+ else
+ {
+ /* Remember this section. */
+ collate->current_section = sp;
+
+ /* One should not be allowed to open the same
+ section twice. */
+ if (sp->first != NULL)
+ lr_error (ldfile, _("\
+%s: multiple order definitions for section `%s'"),
+ "LC_COLLATE", sp->name);
+
+ /* Next should come the end of the line or a semicolon. */
+ arg = lr_token (ldfile, charmap, repertoire);
+ if (arg->tok == tok_eol)
+ {
+ uint32_t cnt;
+
+ /* This means we have exactly one rule: `forward'. */
+ if (collate->nrules > 1)
+ lr_error (ldfile, _("\
+%s: invalid number of sorting rules"),
+ "LC_COLLATE");
+ else
+ collate->nrules = 1;
+ sp->rules = obstack_alloc (&collate->mempool,
+ (sizeof (enum coll_sort_rule)
+ * collate->nrules));
+ for (cnt = 0; cnt < collate->nrules; ++cnt)
+ sp->rules[cnt] = sort_forward;
+
+ /* Next line. */
+ break;
+ }
+
+ /* Get the next token. */
+ arg = lr_token (ldfile, charmap, repertoire);
+ }
+ }
+ else
+ {
+ /* There is no section symbol. Therefore we use the unnamed
+ section. */
+ collate->current_section = &collate->unnamed_section;
+
+ if (collate->unnamed_section.first != NULL)
+ lr_error (ldfile, _("\
+%s: multiple order definitions for unnamed section"),
+ "LC_COLLATE");
+ }
+
+ /* Now read the direction names. */
+ read_directions (ldfile, arg, charmap, repertoire, collate);
+ break;
+
+ case tok_order_end:
+ if (state != 1)
+ goto err_label;
+ state = 2;
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_reorder_after:
+ if (state != 2 && state != 3)
+ goto err_label;
+ state = 3;
+ /* XXX get symbol */
+ break;
+
+ case tok_reorder_end:
+ if (state != 3)
+ goto err_label;
+ state = 4;
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_bsymbol:
+ if (state != 1 && state != 3)
+ goto err_label;
+
+ if (state == 3)
+ {
+ /* It is possible that we already have this collation sequence.
+ In this case we move the entry. */
+ struct element_t *seqp;
+
+ if (find_entry (&collate->seq_table, arg->val.str.startmb,
+ arg->val.str.lenmb, (void **) &seqp) == 0)
+ {
+ /* Remove the entry from the old position. */
+ if (seqp->last == NULL)
+ collate->start = seqp->next;
+ else
+ seqp->last->next = seqp->next;
+ if (seqp->next != NULL)
+ seqp->next->last = seqp->last;
+
+ /* We also have to check whether this entry is the
+ first or last of a section. */
+ if (seqp->section->first == seqp)
+ {
+ if (seqp->section->first == seqp->section->last)
+ /* This setion has no content anymore. */
+ seqp->section->first = seqp->section->last = NULL;
+ else
+ seqp->section->first = seqp->next;
+ }
+ else if (seqp->section->last == seqp)
+ seqp->section->last = seqp->last;
+
+ seqp->last = seqp->next = NULL;
+ }
+ }
+
+ /* Now insert in the new place. */
+ insert_value (ldfile, arg, charmap, repertoire, collate);
+ break;
+
+ case tok_undefined:
+ if (state != 1)
+ goto err_label;
+ /* XXX handle UNDEFINED weight */
+ break;
+
+ case tok_ellipsis3:
+ if (state != 1 && state != 3)
+ goto err_label;
+
+ was_ellipsis = 1;
+ /* XXX Read the remainder of the line and remember what are
+ the weights. */
+ break;
+
+ case tok_end:
+ /* Next we assume `LC_COLLATE'. */
+ if (state == 0)
+ /* We must either see a copy statement or have ordering values. */
+ lr_error (ldfile, _("%s: empty category description not allowed"),
+ "LC_COLLATE");
+ else if (state == 1)
+ lr_error (ldfile, _("%s: missing `order_end' keyword"),
+ "LC_COLLATE");
+ else if (state == 3)
+ error (0, 0, _("%s: missing `reorder-end' keyword"),
+ "LC_COLLATE");
+ arg = lr_token (ldfile, charmap, NULL);
+ if (arg->tok == tok_eof)
+ break;
+ if (arg->tok == tok_eol)
+ lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
+ else if (arg->tok != tok_lc_collate)
+ lr_error (ldfile, _("\
+%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
+ lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
+ return;
+
+ default:
+ err_label:
+ SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, NULL);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
+}
+
+
+#if 0
/* What kind of symbols get defined? */
enum coll_symbol
@@ -75,7 +1077,8 @@ typedef struct patch_t
typedef struct element_t
{
- const wchar_t *name;
+ const char *namemb;
+ const uint32_t *namewc;
unsigned int this_weight;
struct element_t *next;
@@ -95,12 +1098,12 @@ struct locale_collate_t
hash_table elements;
struct obstack element_mem;
- /* The result table. */
- hash_table result;
+ /* The result tables. */
+ hash_table resultmb;
+ hash_table resultwc;
/* Sorting rules given in order_start line. */
- u_int32_t nrules;
- u_int32_t nrules_max;
+ uint32_t nrules;
enum coll_sort_rule *rules;
/* Used while recognizing symbol composed of multiple tokens
@@ -114,20 +1117,12 @@ struct locale_collate_t
/* Was lastline ellipsis? */
int was_ellipsis;
/* Value of last entry if was character. */
- wchar_t last_char;
+ uint32_t last_char;
/* Current element. */
element_t *current_element;
/* What kind of symbol is current element. */
enum coll_symbol kind;
- /* While collecting the weights we need some temporary space. */
- unsigned int current_order;
- int *weight_cnt;
- unsigned int weight_idx;
- unsigned int *weight;
- size_t nweight;
- size_t nweight_max;
-
/* Patch lists. */
patch_t *current_patch;
patch_t *all_patches;
@@ -135,6 +1130,10 @@ struct locale_collate_t
/* Room for the UNDEFINED information. */
element_t undefined;
unsigned int undefined_len;
+
+ /* Script information. */
+ const char **scripts;
+ unsigned int nscripts;
};
@@ -142,25 +1141,22 @@ struct locale_collate_t
extern int verbose;
-void *xmalloc (size_t __n);
-void *xrealloc (void *__p, size_t __n);
-
#define obstack_chunk_alloc malloc
#define obstack_chunk_free free
-void
-collate_startup (struct linereader *lr, struct localedef_t *locale,
- struct charset_t *charset)
-{
- struct locale_collate_t *collate;
+/* Prototypes for local functions. */
+static void collate_startup (struct linereader *ldfile,
+ struct localedef_t *locale,
+ struct charmap_t *charmap, int ignore_content);
- /* We have a definition for LC_COLLATE. */
- copy_posix.mask &= ~(1 << LC_COLLATE);
- /* It is important that we always use UCS4 encoding for strings now. */
- encoding_method = ENC_UCS4;
+static void
+collate_startup (struct linereader *ldfile, struct localedef_t *locale,
+ struct charmap_t *charset, int ignore_content)
+{
+ struct locale_collate_t *collate;
/* Allocate the needed room. */
locale->categories[LC_COLLATE].collate = collate =
@@ -196,12 +1192,14 @@ collate_startup (struct linereader *lr, struct localedef_t *locale,
/* This tells us no UNDEFINED entry was found until now. */
memset (&collate->undefined, '\0', sizeof (collate->undefined));
- lr->translate_strings = 0;
+ ldfile->translate_strings = 0;
+ ldfile->return_widestr = 0;
}
void
-collate_finish (struct localedef_t *locale, struct charset_t *charset)
+collate_finish (struct localedef_t *locale, struct charset_t *charset,
+ struct repertoire_t *repertoire)
{
struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
patch_t *patch;
@@ -211,7 +1209,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset)
correctly filled. */
for (patch = collate->all_patches; patch != NULL; patch = patch->next)
{
- wchar_t wch;
+ uint32_t wch;
size_t toklen = strlen (patch->token);
void *ptmp;
unsigned int value = 0;
@@ -221,7 +1219,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset)
{
element_t *runp;
- if (find_entry (&collate->result, &wch, sizeof (wchar_t),
+ if (find_entry (&collate->result, &wch, sizeof (uint32_t),
(void *) &runp) < 0)
runp = NULL;
for (; runp != NULL; runp = runp->next)
@@ -262,9 +1260,9 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset)
|* XXX We should test whether really an unspecified character *|
|* exists before giving the message. *|
\**************************************************************/
- u_int32_t weight;
+ uint32_t weight;
- if (/* XXX Remove the 0 & */ 0 && !be_quiet)
+ if (!be_quiet)
error (0, 0, _("no definition of `UNDEFINED'"));
collate->undefined.ordering_len = collate->nrules;
@@ -272,7 +1270,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset)
for (cnt = 0; cnt < collate->nrules; ++cnt)
{
- u_int32_t one = 1;
+ uint32_t one = 1;
obstack_grow (&collate->element_mem, &one, sizeof (one));
}
@@ -282,7 +1280,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset)
collate->undefined.ordering = obstack_finish (&collate->element_mem);
}
- collate->undefined_len = 2; /* For the name: 1 x wchar_t + L'\0'. */
+ collate->undefined_len = 2; /* For the name: 1 x uint32_t + L'\0'. */
for (cnt = 0; cnt < collate->nrules; ++cnt)
collate->undefined_len += 1 + collate->undefined.ordering[cnt];
}
@@ -291,40 +1289,40 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset)
void
collate_output (struct localedef_t *locale, struct charset_t *charset,
- const char *output_path)
+ struct repertoire_t *repertoire, const char *output_path)
{
struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
- u_int32_t table_size, table_best, level_best, sum_best;
+ uint32_t table_size, table_best, level_best, sum_best;
void *last;
element_t *pelem;
- wchar_t *name;
+ uint32_t *name;
size_t len;
const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
struct iovec iov[2 + nelems];
struct locale_file data;
- u_int32_t idx[nelems];
+ uint32_t idx[nelems];
struct obstack non_simple;
struct obstack string_pool;
size_t cnt, entry_size;
- u_int32_t undefined_offset = UINT_MAX;
- u_int32_t *table, *extra, *table2, *extra2;
+ uint32_t undefined_offset = UINT_MAX;
+ uint32_t *table, *extra, *table2, *extra2;
size_t extra_len;
- u_int32_t element_hash_tab_size;
- u_int32_t *element_hash_tab;
- u_int32_t *element_hash_tab_ob;
- u_int32_t element_string_pool_size;
+ uint32_t element_hash_tab_size;
+ uint32_t *element_hash_tab;
+ uint32_t *element_hash_tab_ob;
+ uint32_t element_string_pool_size;
char *element_string_pool;
- u_int32_t element_value_size;
- wchar_t *element_value;
- wchar_t *element_value_ob;
- u_int32_t symbols_hash_tab_size;
- u_int32_t *symbols_hash_tab;
- u_int32_t *symbols_hash_tab_ob;
- u_int32_t symbols_string_pool_size;
+ uint32_t element_value_size;
+ uint32_t *element_value;
+ uint32_t *element_value_ob;
+ uint32_t symbols_hash_tab_size;
+ uint32_t *symbols_hash_tab;
+ uint32_t *symbols_hash_tab_ob;
+ uint32_t symbols_string_pool_size;
char *symbols_string_pool;
- u_int32_t symbols_class_size;
- u_int32_t *symbols_class;
- u_int32_t *symbols_class_ob;
+ uint32_t symbols_class_size;
+ uint32_t *symbols_class;
+ uint32_t *symbols_class_ob;
hash_table *hash_tab;
unsigned int dummy_weights[collate->nrules + 1];
@@ -382,29 +1380,29 @@ Computing table size for collation information might take a while..."),
iov[1].iov_len = sizeof (idx);
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (u_int32_t);
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (uint32_t);
- table = (u_int32_t *) alloca (collate->nrules * sizeof (u_int32_t));
+ table = (uint32_t *) alloca (collate->nrules * sizeof (uint32_t));
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len
- = collate->nrules * sizeof (u_int32_t);
+ = collate->nrules * sizeof (uint32_t);
/* Another trick here. Describing the collation method needs only a
few bits (3, to be exact). But the binary file should be
accessible by machines with both endianesses and so we store both
forms in the same word. */
for (cnt = 0; cnt < collate->nrules; ++cnt)
- table[cnt] = collate->rules[cnt] | SWAPU32 (collate->rules[cnt]);
+ table[cnt] = collate->rules[cnt] | bswap_32 (collate->rules[cnt]);
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (u_int32_t);
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (uint32_t);
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len
- = sizeof (u_int32_t);
+ = sizeof (uint32_t);
entry_size = 1 + MAX (collate->nrules, 2);
- table = (u_int32_t *) alloca (table_best * level_best * entry_size
+ table = (uint32_t *) alloca (table_best * level_best * entry_size
* sizeof (table[0]));
memset (table, '\0', table_best * level_best * entry_size
* sizeof (table[0]));
@@ -413,7 +1411,7 @@ Computing table size for collation information might take a while..."),
/* Macros for inserting in output table. */
#define ADD_VALUE(expr) \
do { \
- u_int32_t to_write = (u_int32_t) expr; \
+ uint32_t to_write = (uint32_t) expr; \
obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
} while (0)
@@ -424,7 +1422,7 @@ Computing table size for collation information might take a while..."),
ADD_VALUE (len); \
\
wlen = wcslen (pelem->name); \
- obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \
+ obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (uint32_t)); \
\
idx = collate->nrules; \
for (cnt = 0; cnt < collate->nrules; ++cnt) \
@@ -448,14 +1446,14 @@ Computing table size for collation information might take a while..."),
table[(level * table_best + slot) * entry_size + 1] \
= FORWARD_CHAR; \
table[(level * table_best + slot) * entry_size + 2] \
- = obstack_object_size (&non_simple) / sizeof (u_int32_t); \
+ = obstack_object_size (&non_simple) / sizeof (uint32_t); \
\
/* Here we have to construct the non-simple table entry. First \
compute the total length of this entry. */ \
for (runp = (pelem); runp != NULL; runp = runp->next) \
if (runp->ordering != NULL) \
{ \
- u_int32_t value; \
+ uint32_t value; \
size_t cnt; \
\
value = 1 + wcslen (runp->name) + 1; \
@@ -491,7 +1489,7 @@ Computing table size for collation information might take a while..."),
ADD_VALUE (collate->undefined.ordering[cnt]); \
for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
{ \
- if ((wchar_t) collate->undefined.ordering[idx] \
+ if ((uint32_t) collate->undefined.ordering[idx] \
== ELLIPSIS_CHAR) \
ADD_VALUE ((pelem)->name[0]); \
else \
@@ -543,14 +1541,15 @@ Computing table size for collation information might take a while..."),
{
/* We have to fill in the information from the UNDEFINED
entry. */
- table[cnt * entry_size] = (u_int32_t) cnt;
+ table[cnt * entry_size] = (uint32_t) cnt;
if (collate->undefined.ordering_len == collate->nrules)
{
size_t inner;
for (inner = 0; inner < collate->nrules; ++inner)
- if ((wchar_t)collate->undefined.ordering[collate->nrules + inner]
+ if ((uint32_t)collate->undefined.ordering[collate->nrules
+ + inner]
== ELLIPSIS_CHAR)
table[cnt * entry_size + 1 + inner] = cnt;
else
@@ -609,8 +1608,6 @@ Computing table size for collation information might take a while..."),
size_t idx, cnt;
undefined_offset = obstack_object_size (&non_simple);
- assert (undefined_offset % sizeof (u_int32_t) == 0);
- undefined_offset /= sizeof (u_int32_t);
idx = collate->nrules;
for (cnt = 0; cnt < collate->nrules; ++cnt)
@@ -625,19 +1622,19 @@ Computing table size for collation information might take a while..."),
/* Finish the extra block. */
extra_len = obstack_object_size (&non_simple);
- extra = (u_int32_t *) obstack_finish (&non_simple);
- assert ((extra_len % sizeof (u_int32_t)) == 0);
+ extra = (uint32_t *) obstack_finish (&non_simple);
+ assert ((extra_len % sizeof (uint32_t)) == 0);
/* Now we have to build the two array for the other byte ordering. */
- table2 = (u_int32_t *) alloca (table_best * level_best * entry_size
+ table2 = (uint32_t *) alloca (table_best * level_best * entry_size
* sizeof (table[0]));
- extra2 = (u_int32_t *) alloca (extra_len);
+ extra2 = (uint32_t *) alloca (extra_len);
for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt)
- table2[cnt] = SWAPU32 (table[cnt]);
+ table2[cnt] = bswap_32 (table[cnt]);
- for (cnt = 0; cnt < extra_len / sizeof (u_int32_t); ++cnt)
- extra2[cnt] = SWAPU32 (extra[cnt]);
+ for (cnt = 0; cnt < extra_len / sizeof (uint32_t); ++cnt)
+ extra2[cnt] = bswap_32 (extra2[cnt]);
/* We need a simple hashing table to get a collation-element->chars
mapping. We again use internal hashing using a secondary hashing
@@ -687,9 +1684,9 @@ Computing table size for collation information might take a while..."),
element_hash_tab_size = 7;
element_hash_tab = obstack_alloc (&non_simple, (2 * element_hash_tab_size
- * sizeof (u_int32_t)));
+ * sizeof (uint32_t)));
memset (element_hash_tab, '\377', (2 * element_hash_tab_size
- * sizeof (u_int32_t)));
+ * sizeof (uint32_t)));
ptr = NULL;
while (iterate_table (&collate->elements, &ptr, (const void **) &key,
@@ -698,7 +1695,7 @@ Computing table size for collation information might take a while..."),
size_t hash_val = hash_string (key, keylen);
size_t idx = hash_val % element_hash_tab_size;
- if (element_hash_tab[2 * idx] != (~((u_int32_t) 0)))
+ if (element_hash_tab[2 * idx] != (~((uint32_t) 0)))
{
/* We need the second hashing function. */
size_t c = 1 + (hash_val % (element_hash_tab_size - 2));
@@ -708,16 +1705,16 @@ Computing table size for collation information might take a while..."),
idx -= element_hash_tab_size - c;
else
idx += c;
- while (element_hash_tab[2 * idx] != (~((u_int32_t) 0)));
+ while (element_hash_tab[2 * idx] != (~((uint32_t) 0)));
}
element_hash_tab[2 * idx] = obstack_object_size (&non_simple);
element_hash_tab[2 * idx + 1] = (obstack_object_size (&string_pool)
- / sizeof (wchar_t));
+ / sizeof (uint32_t));
obstack_grow0 (&non_simple, key, keylen);
obstack_grow (&string_pool, data->name,
- (wcslen (data->name) + 1) * sizeof (wchar_t));
+ (wcslen (data->name) + 1) * sizeof (uint32_t));
}
if (obstack_object_size (&non_simple) % 4 != 0)
@@ -732,18 +1729,13 @@ Computing table size for collation information might take a while..."),
/* Create the tables for the other byte order. */
element_hash_tab_ob = obstack_alloc (&non_simple,
(2 * element_hash_tab_size
- * sizeof (u_int32_t)));
+ * sizeof (uint32_t)));
for (cnt = 0; cnt < 2 * element_hash_tab_size; ++cnt)
- element_hash_tab_ob[cnt] = SWAPU32 (element_hash_tab[cnt]);
+ element_hash_tab_ob[cnt] = bswap_U32 (element_hash_tab[cnt]);
element_value_ob = obstack_alloc (&string_pool, element_value_size);
- if (sizeof (wchar_t) != 4)
- {
- fputs ("sizeof (wchar_t) != 4 currently not handled", stderr);
- abort ();
- }
for (cnt = 0; cnt < element_value_size / 4; ++cnt)
- element_value_ob[cnt] = SWAPU32 (element_value[cnt]);
+ element_value_ob[cnt] = bswap_32 (element_value[cnt]);
}
/* Store collation elements as map to collation class. There are
@@ -757,9 +1749,9 @@ Computing table size for collation information might take a while..."),
+ collate->elements.filled
+ collate->symbols.filled)) / 3);
symbols_hash_tab = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size
- * sizeof (u_int32_t)));
+ * sizeof (uint32_t)));
memset (symbols_hash_tab, '\377', (2 * symbols_hash_tab_size
- * sizeof (u_int32_t)));
+ * sizeof (uint32_t)));
/* Now fill the array. First the symbols from the character set,
then the collation elements and last the collation symbols. */
@@ -777,29 +1769,29 @@ Computing table size for collation information might take a while..."),
{
size_t hash_val;
size_t idx;
- u_int32_t word;
+ uint32_t word;
unsigned int *weights;
if (hash_tab == &charset->char_table
|| hash_tab == &collate->elements)
{
element_t *lastp, *firstp;
- wchar_t dummy_name[2];
- const wchar_t *name;
+ uint32_t dummy_name[2];
+ const uint32_t *name;
size_t name_len;
if (hash_tab == &charset->char_table)
{
- dummy_name[0] = (wchar_t) ((unsigned long int) data);
+ dummy_name[0] = (uint32_t) ((unsigned long int) data);
dummy_name[1] = L'\0';
name = dummy_name;
- name_len = sizeof (wchar_t);
+ name_len = sizeof (uint32_t);
}
else
{
element_t *elemp = (element_t *) data;
name = elemp->name;
- name_len = wcslen (name) * sizeof (wchar_t);
+ name_len = wcslen (name) * sizeof (uint32_t);
}
/* First check whether this character is used at all. */
@@ -815,8 +1807,6 @@ Computing table size for collation information might take a while..."),
lastp = firstp;
while (lastp->next != NULL && wcscmp (name, lastp->name))
lastp = lastp->next;
- if (lastp->ordering == NULL)
- lastp = &collate->undefined;
}
weights = lastp->ordering;
@@ -835,7 +1825,7 @@ Computing table size for collation information might take a while..."),
hash_val = hash_string (key, keylen);
idx = hash_val % symbols_hash_tab_size;
- if (symbols_hash_tab[2 * idx] != (~((u_int32_t) 0)))
+ if (symbols_hash_tab[2 * idx] != (~((uint32_t) 0)))
{
/* We need the second hashing function. */
size_t c = 1 + (hash_val % (symbols_hash_tab_size - 2));
@@ -845,23 +1835,23 @@ Computing table size for collation information might take a while..."),
idx -= symbols_hash_tab_size - c;
else
idx += c;
- while (symbols_hash_tab[2 * idx] != (~((u_int32_t) 0)));
+ while (symbols_hash_tab[2 * idx] != (~((uint32_t) 0)));
}
symbols_hash_tab[2 * idx] = obstack_object_size (&string_pool);
symbols_hash_tab[2 * idx + 1] = (obstack_object_size (&non_simple)
- / sizeof (u_int32_t));
+ / sizeof (uint32_t));
obstack_grow0 (&string_pool, key, keylen);
/* Adding the first weight looks complicated. We have to deal
with the kind it is stored and with the fact that original
- form uses `unsigned int's while we need `u_int32_t' here. */
+ form uses `unsigned int's while we need `uint32_t' here. */
word = weights[0];
- obstack_grow (&non_simple, &word, sizeof (u_int32_t));
+ obstack_grow (&non_simple, &word, sizeof (uint32_t));
for (cnt = 0; cnt < weights[0]; ++cnt)
{
word = weights[collate->nrules + cnt];
- obstack_grow (&non_simple, &word, sizeof (u_int32_t));
+ obstack_grow (&non_simple, &word, sizeof (uint32_t));
}
}
@@ -884,13 +1874,13 @@ Computing table size for collation information might take a while..."),
/* Generate tables with other byte order. */
symbols_hash_tab_ob = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size
- * sizeof (u_int32_t)));
+ * sizeof (uint32_t)));
for (cnt = 0; cnt < 2 * symbols_hash_tab_size; ++cnt)
- symbols_hash_tab_ob[cnt] = SWAPU32 (symbols_hash_tab[cnt]);
+ symbols_hash_tab_ob[cnt] = bswap_32 (symbols_hash_tab[cnt]);
symbols_class_ob = obstack_alloc (&non_simple, symbols_class_size);
for (cnt = 0; cnt < symbols_class_size / 4; ++cnt)
- symbols_class_ob[cnt] = SWAPU32 (symbols_class[cnt]);
+ symbols_class_ob[cnt] = bswap_32 (symbols_class[cnt]);
/* Store table addresses and lengths. */
@@ -925,34 +1915,34 @@ Computing table size for collation information might take a while..."),
#endif
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (u_int32_t);
+ iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (uint32_t);
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_base
= &element_hash_tab_size;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_len
- = sizeof (u_int32_t);
+ = sizeof (uint32_t);
#if __BYTE_ORDER == __BIG_ENDIAN
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base
= element_hash_tab;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len
- = 2 * element_hash_tab_size * sizeof (u_int32_t);
+ = 2 * element_hash_tab_size * sizeof (uint32_t);
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base
= element_hash_tab_ob;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len
- = 2 * element_hash_tab_size * sizeof (u_int32_t);
+ = 2 * element_hash_tab_size * sizeof (uint32_t);
#else
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base
= element_hash_tab;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len
- = 2 * element_hash_tab_size * sizeof (u_int32_t);
+ = 2 * element_hash_tab_size * sizeof (uint32_t);
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base
= element_hash_tab_ob;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len
- = 2 * element_hash_tab_size * sizeof (u_int32_t);
+ = 2 * element_hash_tab_size * sizeof (uint32_t);
#endif
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_base
@@ -985,28 +1975,28 @@ Computing table size for collation information might take a while..."),
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_base
= &symbols_hash_tab_size;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_len
- = sizeof (u_int32_t);
+ = sizeof (uint32_t);
#if __BYTE_ORDER == __BIG_ENDIAN
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base
= symbols_hash_tab;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len
- = 2 * symbols_hash_tab_size * sizeof (u_int32_t);
+ = 2 * symbols_hash_tab_size * sizeof (uint32_t);
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base
= symbols_hash_tab_ob;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len
- = 2 * symbols_hash_tab_size * sizeof (u_int32_t);
+ = 2 * symbols_hash_tab_size * sizeof (uint32_t);
#else
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base
= symbols_hash_tab;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len
- = 2 * symbols_hash_tab_size * sizeof (u_int32_t);
+ = 2 * symbols_hash_tab_size * sizeof (uint32_t);
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base
= symbols_hash_tab_ob;
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len
- = 2 * symbols_hash_tab_size * sizeof (u_int32_t);
+ = 2 * symbols_hash_tab_size * sizeof (uint32_t);
#endif
iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_base
@@ -1048,58 +2038,64 @@ Computing table size for collation information might take a while..."),
}
-void
-collate_element_to (struct linereader *lr, struct localedef_t *locale,
- struct token *code, struct charset_t *charset)
+static int
+collate_element_to (struct linereader *ldfile,
+ struct locale_collate_t *collate,
+ struct token *code, struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
- unsigned int value;
+ struct charseq *seq;
+ uint32_t value;
void *not_used;
- if (collate->combine_token != NULL)
+ seq = charmap_find_value (charmap, code->val.str.start, code->val.str.len);
+ if (seq != NULL)
{
- free ((void *) collate->combine_token);
- collate->combine_token = NULL;
+ lr_error (ldfile, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates symbolic name in charmap"),
+ (int) code->val.str.len, code->val.str.start);
+ return 1;
}
- value = charset_find_value (&charset->char_table, code->val.str.start,
- code->val.str.len);
- if ((wchar_t) value != ILLEGAL_CHAR_VALUE)
+ value = repertoire_find_value (repertoire, code->val.str.start,
+ code->val.str.len);
+ if (value != ILLEGAL_CHAR_VALUE)
{
- lr_error (lr, _("symbol for multicharacter collating element "
- "`%.*s' duplicates symbolic name in charset"),
+ lr_error (ldfile, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates symbolic name in repertoire"),
(int) code->val.str.len, code->val.str.start);
- return;
+ return 1;
}
if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
&not_used) >= 0)
{
- lr_error (lr, _("symbol for multicharacter collating element "
- "`%.*s' duplicates element definition"),
+ lr_error (ldfile, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates other element definition"),
(int) code->val.str.len, code->val.str.start);
- return;
+ return 1;
}
if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
&not_used) >= 0)
{
- lr_error (lr, _("symbol for multicharacter collating element "
+ lr_error (ldfile, _("symbol for multicharacter collating element "
"`%.*s' duplicates symbol definition"),
(int) code->val.str.len, code->val.str.start);
- return;
+ return 1;
}
- collate->combine_token = code->val.str.start;
- collate->combine_token_len = code->val.str.len;
+ return 0;
}
-void
-collate_element_from (struct linereader *lr, struct localedef_t *locale,
- struct token *code, struct charset_t *charset)
+static void
+collate_element_from (struct linereader *ldfile,
+ struct locale_collate_t *collate,
+ const char *to_str, struct token *code,
+ struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
element_t *elemp, *runp;
/* CODE is a string. */
@@ -1108,33 +2104,26 @@ collate_element_from (struct linereader *lr, struct localedef_t *locale,
/* We have to translate the string. It may contain <...> character
names. */
- elemp->name = (wchar_t *) translate_string (code->val.str.start, charset);
+ elemp->namemb = code->val.str.startmb;
+ elemp->namewc = code->val.str.startwc;
elemp->this_weight = 0;
elemp->ordering = NULL;
elemp->ordering_len = 0;
- free (code->val.str.start);
-
- if (elemp->name == NULL)
+ if (elemp->namemb == NULL && elemp->namewc == NULL)
{
- /* At least one character in the string is not defined. We simply
- do nothing. */
+ /* The string contains characters which are not in the charmap nor
+ in the repertoire. Ignore the string. */
if (verbose)
- lr_error (lr, _("\
+ lr_error (ldfile, _("\
`from' string in collation element declaration contains unknown character"));
return;
}
- if (elemp->name[0] == L'\0' || elemp->name[1] == L'\0')
- {
- lr_error (lr, _("illegal collation element"));
- return;
- }
-
/* The entries in the linked lists of RESULT are sorting in
descending order. The order is important for the `strcoll' and
`wcscoll' functions. */
- if (find_entry (&collate->result, elemp->name, sizeof (wchar_t),
+ if (find_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t),
(void *) &runp) >= 0)
{
/* We already have an entry with this key. Check whether it is
@@ -1144,7 +2133,49 @@ collate_element_from (struct linereader *lr, struct localedef_t *locale,
do
{
- cmpres = wcscmp (elemp->name, runp->name);
+ cmpres = wcscmp (elemp->namewc, runp->namewc);
+ if (cmpres <= 0)
+ break;
+ prevp = runp;
+ }
+ while ((runp = runp->next) != NULL);
+
+ if (cmpres == 0)
+ lr_error (ldfile, _("\
+duplicate collating element definition (repertoire)"));
+ else
+ {
+ elemp->next = runp;
+ if (prevp == NULL)
+ {
+ if (set_entry (&collate->resultwc, elemp->namewc,
+ sizeof (uint32_t), elemp) < 0)
+ error (EXIT_FAILURE, 0, _("\
+error while inserting collation element into hash table"));
+ }
+ else
+ prevp->next = elemp;
+ }
+ }
+ else
+ {
+ elemp->next = NULL;
+ if (insert_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t),
+ elemp) < 0)
+ error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
+ }
+
+ /* Now also insert the element definition in the multibyte table. */
+ if (find_entry (&collate->resultmb, elemp->namemb, 1, (void *) &runp) >= 0)
+ {
+ /* We already have an entry with this key. Check whether it is
+ identical. */
+ element_t *prevp = NULL;
+ int cmpres;
+
+ do
+ {
+ cmpres = strcmp (elemp->namemb, runp->namemb);
if (cmpres <= 0)
break;
prevp = runp;
@@ -1152,14 +2183,14 @@ collate_element_from (struct linereader *lr, struct localedef_t *locale,
while ((runp = runp->next) != NULL);
if (cmpres == 0)
- lr_error (lr, _("duplicate collating element definition"));
+ lr_error (ldfile, _("\
+duplicate collating element definition (charmap)"));
else
{
elemp->next = runp;
if (prevp == NULL)
{
- if (set_entry (&collate->result, elemp->name, sizeof (wchar_t),
- elemp) < 0)
+ if (set_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0)
error (EXIT_FAILURE, 0, _("\
error while inserting collation element into hash table"));
}
@@ -1170,32 +2201,41 @@ error while inserting collation element into hash table"));
else
{
elemp->next = NULL;
- if (insert_entry (&collate->result, elemp->name, sizeof (wchar_t), elemp)
- < 0)
+ if (insert_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0)
error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
}
- if (insert_entry (&collate->elements, collate->combine_token,
- collate->combine_token_len, (void *) elemp) < 0)
- lr_error (lr, _("cannot insert new collating symbol definition: %s"),
+ /* Finally install the mapping from the `to'-name to the `from'-name. */
+ if (insert_entry (&collate->elements, to_str, strlen (to_str),
+ (void *) elemp) < 0)
+ lr_error (ldfile, _("cannot insert new collating symbol definition: %s"),
strerror (errno));
}
-void
-collate_symbol (struct linereader *lr, struct localedef_t *locale,
- struct token *code, struct charset_t *charset)
+static void
+collate_symbol (struct linereader *ldfile, struct locale_collate_t *collate,
+ struct token *code, struct charmap_t *charmap,
+ struct repertoire_t *repertoire)
{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
- wchar_t value;
+ uint32_t value;
+ struct charseq *seq;
void *not_used;
- value = charset_find_value (&charset->char_table, code->val.str.start,
- code->val.str.len);
+ seq = charset_find_value (charmap, code->val.str.start, code->val.str.len);
+ if (seq != NULL)
+ {
+ lr_error (ldfile, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates symbolic name in charmap"),
+ (int) code->val.str.len, code->val.str.start);
+ return;
+ }
+
+ value = repertoire (repertoire, code->val.str.start, code->val.str.len);
if (value != ILLEGAL_CHAR_VALUE)
{
- lr_error (lr, _("symbol for multicharacter collating element "
- "`%.*s' duplicates symbolic name in charset"),
+ lr_error (ldfile, _("symbol for multicharacter collating element "
+ "`%.*s' duplicates symbolic name in repertoire"),
(int) code->val.str.len, code->val.str.start);
return;
}
@@ -1203,7 +2243,7 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale,
if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
&not_used) >= 0)
{
- lr_error (lr, _("symbol for multicharacter collating element "
+ lr_error (ldfile, _("symbol for multicharacter collating element "
"`%.*s' duplicates element definition"),
(int) code->val.str.len, code->val.str.start);
return;
@@ -1212,7 +2252,7 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale,
if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len,
&not_used) >= 0)
{
- lr_error (lr, _("symbol for multicharacter collating element "
+ lr_error (ldfile, _("symbol for multicharacter collating element "
"`%.*s' duplicates other symbol definition"),
(int) code->val.str.len, code->val.str.start);
return;
@@ -1220,13 +2260,13 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale,
if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len,
(void *) 0) < 0)
- lr_error (lr, _("cannot insert new collating symbol definition: %s"),
+ lr_error (ldfile, _("cannot insert new collating symbol definition: %s"),
strerror (errno));
}
void
-collate_new_order (struct linereader *lr, struct localedef_t *locale,
+collate_new_order (struct linereader *ldfile, struct localedef_t *locale,
enum coll_sort_rule sort_rule)
{
struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
@@ -1245,7 +2285,7 @@ collate_new_order (struct linereader *lr, struct localedef_t *locale,
void
-collate_build_arrays (struct linereader *lr, struct localedef_t *locale)
+collate_build_arrays (struct linereader *ldfile, struct localedef_t *locale)
{
struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
@@ -1264,13 +2304,13 @@ collate_build_arrays (struct linereader *lr, struct localedef_t *locale)
int
-collate_order_elem (struct linereader *lr, struct localedef_t *locale,
+collate_order_elem (struct linereader *ldfile, struct localedef_t *locale,
struct token *code, struct charset_t *charset)
{
- const wchar_t zero = L'\0';
+ const uint32_t zero = L'\0';
struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
int result = 0;
- wchar_t value;
+ uint32_t value;
void *tmp;
unsigned int i;
@@ -1286,7 +2326,7 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale,
collate->kind = character;
- if (find_entry (&collate->result, &value, sizeof (wchar_t),
+ if (find_entry (&collate->result, &value, sizeof (uint32_t),
(void *) &firstp) < 0)
firstp = lastp = NULL;
else
@@ -1299,9 +2339,10 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale,
if (lastp->name[0] == value && lastp->name[1] == L'\0')
{
- lr_error (lr, _("duplicate definition for character `%.*s'"),
+ lr_error (ldfile,
+ _("duplicate definition for character `%.*s'"),
(int) code->val.str.len, code->val.str.start);
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
result = -1;
break;
}
@@ -1315,7 +2356,7 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale,
obstack_grow (&collate->element_mem, &zero, sizeof (zero));
collate->current_element->name =
- (const wchar_t *) obstack_finish (&collate->element_mem);
+ (const uint32_t *) obstack_finish (&collate->element_mem);
collate->current_element->this_weight = ++collate->order_cnt;
@@ -1323,10 +2364,10 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale,
if (firstp == NULL)
{
- if (insert_entry (&collate->result, &value, sizeof (wchar_t),
+ if (insert_entry (&collate->result, &value, sizeof (uint32_t),
(void *) collate->current_element) < 0)
{
- lr_error (lr, _("cannot insert collation element `%.*s'"),
+ lr_error (ldfile, _("cannot insert collation element `%.*s'"),
(int) code->val.str.len, code->val.str.start);
exit (4);
}
@@ -1341,10 +2382,10 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale,
if (collate->current_element->this_weight != 0)
{
- lr_error (lr, _("\
+ lr_error (ldfile, _("\
collation element `%.*s' appears more than once: ignore line"),
(int) code->val.str.len, code->val.str.start);
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
result = -1;
break;
}
@@ -1359,10 +2400,10 @@ collation element `%.*s' appears more than once: ignore line"),
if ((unsigned long int) tmp != 0ul)
{
- lr_error (lr, _("\
+ lr_error (ldfile, _("\
collation symbol `%.*s' appears more than once: ignore line"),
(int) code->val.str.len, code->val.str.start);
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
result = -1;
break;
}
@@ -1372,16 +2413,16 @@ collation symbol `%.*s' appears more than once: ignore line"),
if (set_entry (&collate->symbols, code->val.str.start,
code->val.str.len, (void *) order) < 0)
{
- lr_error (lr, _("cannot process order specification"));
+ lr_error (ldfile, _("cannot process order specification"));
exit (4);
}
}
else
{
if (verbose)
- lr_error (lr, _("unknown symbol `%.*s': line ignored"),
+ lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
(int) code->val.str.len, code->val.str.start);
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
result = -1;
}
@@ -1395,7 +2436,7 @@ collation symbol `%.*s' appears more than once: ignore line"),
case tok_ellipsis:
if (collate->was_ellipsis)
{
- lr_error (lr, _("\
+ lr_error (ldfile, _("\
two lines in a row containing `...' are not allowed"));
result = -1;
}
@@ -1403,9 +2444,9 @@ two lines in a row containing `...' are not allowed"));
{
/* An ellipsis requires the previous line to be an
character definition. */
- lr_error (lr, _("\
+ lr_error (ldfile, _("\
line before ellipsis does not contain definition for character constant"));
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
result = -1;
}
else
@@ -1424,21 +2465,21 @@ line before ellipsis does not contain definition for character constant"));
{
if (collate->kind != character)
{
- lr_error (lr, _("\
+ lr_error (ldfile, _("\
line after ellipsis must contain character definition"));
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
result = -1;
}
else if (collate->last_char > value)
{
- lr_error (lr, _("end point of ellipsis range is bigger then start"));
- lr_ignore_rest (lr, 0);
+ lr_error (ldfile, _("end point of ellipsis range is bigger then start"));
+ lr_ignore_rest (ldfile, 0);
result = -1;
}
else
{
/* We can fill the arrays with the information we need. */
- wchar_t name[2];
+ uint32_t name[2];
unsigned int *data;
size_t *ptr;
size_t cnt;
@@ -1450,9 +2491,6 @@ line after ellipsis must contain character definition"));
* sizeof (unsigned int));
ptr = (size_t *) alloca (collate->nrules * sizeof (size_t));
- if (data == NULL || ptr == NULL)
- error (4, 0, _("memory exhausted"));
-
/* Prepare data. Because the characters covered by an
ellipsis all have equal values we prepare the data once
and only change the variable number (if there are any).
@@ -1470,7 +2508,7 @@ line after ellipsis must contain character definition"));
data[collate->nrules + cnt] = collate->weight[cnt];
for (cnt = 0; cnt < collate->nrules; ++cnt)
- if ((wchar_t) data[ptr[cnt]] != ELLIPSIS_CHAR)
+ if ((uint32_t) data[ptr[cnt]] != ELLIPSIS_CHAR)
ptr[cnt] = 0;
while (name[0] <= value)
@@ -1479,12 +2517,9 @@ line after ellipsis must contain character definition"));
pelem = (element_t *) obstack_alloc (&collate->element_mem,
sizeof (element_t));
- if (pelem == NULL)
- error (4, 0, _("memory exhausted"));
-
pelem->name
- = (const wchar_t *) obstack_copy (&collate->element_mem,
- name, 2 * sizeof (wchar_t));
+ = (const uint32_t *) obstack_copy (&collate->element_mem,
+ name, 2 * sizeof (uint32_t));
pelem->this_weight = ++collate->order_cnt;
pelem->ordering_len = collate->nweight;
@@ -1500,17 +2535,17 @@ line after ellipsis must contain character definition"));
pelem->ordering[ptr[cnt]] = pelem->this_weight;
/* Insert new entry into result table. */
- if (find_entry (&collate->result, name, sizeof (wchar_t),
+ if (find_entry (&collate->result, name, sizeof (uint32_t),
(void *) &pelem->next) >= 0)
{
- if (set_entry (&collate->result, name, sizeof (wchar_t),
+ if (set_entry (&collate->result, name, sizeof (uint32_t),
(void *) pelem) < 0)
error (4, 0, _("cannot insert into result table"));
}
else
{
pelem->next = NULL;
- if (insert_entry (&collate->result, name, sizeof (wchar_t),
+ if (insert_entry (&collate->result, name, sizeof (uint32_t),
(void *) pelem) < 0)
error (4, 0, _("cannot insert into result table"));
}
@@ -1533,12 +2568,12 @@ line after ellipsis must contain character definition"));
int
-collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
+collate_weight_bsymbol (struct linereader *ldfile, struct localedef_t *locale,
struct token *code, struct charset_t *charset)
{
struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
unsigned int here_weight;
- wchar_t value;
+ uint32_t value;
void *tmp;
assert (code->tok == tok_bsymbol);
@@ -1549,7 +2584,7 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
{
element_t *runp;
- if (find_entry (&collate->result, &value, sizeof (wchar_t),
+ if (find_entry (&collate->result, &value, sizeof (uint32_t),
(void *)&runp) < 0)
runp = NULL;
@@ -1574,9 +2609,9 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
else
{
if (verbose)
- lr_error (lr, _("unknown symbol `%.*s': line ignored"),
+ lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
(int) code->val.str.len, code->val.str.start);
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
return -1;
}
@@ -1584,9 +2619,9 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
weight. */
if (collate->kind == symbol)
{
- lr_error (lr, _("\
+ lr_error (ldfile, _("\
specification of sorting weight for collation symbol does not make sense"));
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
return -1;
}
@@ -1606,8 +2641,8 @@ specification of sorting weight for collation symbol does not make sense"));
newp = (patch_t *) obstack_alloc (&collate->element_mem,
sizeof (patch_t));
- newp->fname = lr->fname;
- newp->lineno = lr->lineno;
+ newp->fname = ldfile->fname;
+ newp->lineno = ldfile->lineno;
newp->token = (const char *) obstack_copy0 (&collate->element_mem,
code->val.str.start,
code->val.str.len);
@@ -1624,23 +2659,23 @@ specification of sorting weight for collation symbol does not make sense"));
int
-collate_next_weight (struct linereader *lr, struct localedef_t *locale)
+collate_next_weight (struct linereader *ldfile, struct localedef_t *locale)
{
struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
if (collate->kind == symbol)
{
- lr_error (lr, _("\
+ lr_error (ldfile, _("\
specification of sorting weight for collation symbol does not make sense"));
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
return -1;
}
++collate->weight_idx;
if (collate->weight_idx >= collate->nrules)
{
- lr_error (lr, _("too many weights"));
- lr_ignore_rest (lr, 0);
+ lr_error (ldfile, _("too many weights"));
+ lr_ignore_rest (ldfile, 0);
return -1;
}
@@ -1649,7 +2684,7 @@ specification of sorting weight for collation symbol does not make sense"));
int
-collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
+collate_simple_weight (struct linereader *ldfile, struct localedef_t *locale,
struct token *code, struct charset_t *charset)
{
struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
@@ -1668,9 +2703,9 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
entry. */
if (collate->kind != ellipsis && collate->kind != undefined)
{
- lr_error (lr, _("\
+ lr_error (ldfile, _("\
`...' must only be used in `...' and `UNDEFINED' entries"));
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
return -1;
}
value = ELLIPSIS_CHAR;
@@ -1691,18 +2726,18 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
{
char *startp = (char *) runp;
char *putp = (char *) runp;
- wchar_t wch;
+ uint32_t wch;
/* Lookup weight for char and store it. */
if (*runp == '<')
{
while (*++runp != '\0' && *runp != '>')
{
- if (*runp == lr->escape_char)
+ if (*runp == ldfile->escape_char)
if (*++runp == '\0')
{
- lr_error (lr, _("unterminated weight name"));
- lr_ignore_rest (lr, 0);
+ lr_error (ldfile, _("unterminated weight name"));
+ lr_ignore_rest (ldfile, 0);
return -1;
}
*putp++ = *runp;
@@ -1712,8 +2747,8 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
if (putp == startp)
{
- lr_error (lr, _("empty weight name: line ignored"));
- lr_ignore_rest (lr, 0);
+ lr_error (ldfile, _("empty weight name: line ignored"));
+ lr_ignore_rest (ldfile, 0);
return -1;
}
@@ -1723,7 +2758,7 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
{
element_t *pelem;
- if (find_entry (&collate->result, &wch, sizeof (wchar_t),
+ if (find_entry (&collate->result, &wch, sizeof (uint32_t),
(void *)&pelem) < 0)
pelem = NULL;
@@ -1749,30 +2784,30 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
else
{
if (verbose)
- lr_error (lr, _("unknown symbol `%.*s': line ignored"),
+ lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
(int) (putp - startp), startp);
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
return -1;
}
}
else
{
element_t *wp;
- wchar_t wch;
+ uint32_t wch;
- if (*runp == lr->escape_char)
+ if (*runp == ldfile->escape_char)
{
static const char digits[] = "0123456789abcdef";
const char *dp;
int base;
++runp;
- if (_tolower (*runp) == 'x')
+ if (tolower (*runp) == 'x')
{
++runp;
base = 16;
}
- else if (_tolower (*runp) == 'd')
+ else if (tolower (*runp) == 'd')
{
++runp;
base = 10;
@@ -1780,19 +2815,19 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
else
base = 8;
- dp = strchr (digits, _tolower (*runp));
+ dp = strchr (digits, tolower (*runp));
if (dp == NULL || (dp - digits) >= base)
{
illegal_char:
- lr_error (lr, _("\
+ lr_error (ldfile, _("\
illegal character constant in string"));
- lr_ignore_rest (lr, 0);
+ lr_ignore_rest (ldfile, 0);
return -1;
}
wch = dp - digits;
++runp;
- dp = strchr (digits, _tolower (*runp));
+ dp = strchr (digits, tolower (*runp));
if (dp == NULL || (dp - digits) >= base)
goto illegal_char;
wch *= base;
@@ -1801,7 +2836,7 @@ illegal character constant in string"));
if (base != 16)
{
- dp = strchr (digits, _tolower (*runp));
+ dp = strchr (digits, tolower (*runp));
if (dp != NULL && (dp - digits < base))
{
wch *= base;
@@ -1811,7 +2846,7 @@ illegal character constant in string"));
}
}
else
- wch = (wchar_t) *runp++;
+ wch = (uint32_t) *runp++;
/* Lookup the weight for WCH. */
if (find_entry (&collate->result, &wch, sizeof (wch),
@@ -1849,8 +2884,8 @@ illegal character constant in string"));
newp = (patch_t *) obstack_alloc (&collate->element_mem,
sizeof (patch_t));
- newp->fname = lr->fname;
- newp->lineno = lr->lineno;
+ newp->fname = ldfile->fname;
+ newp->lineno = ldfile->lineno;
newp->token
= (const char *) obstack_copy0 (&collate->element_mem,
startp, putp - startp);
@@ -1885,7 +2920,7 @@ illegal character constant in string"));
void
-collate_end_weight (struct linereader *lr, struct localedef_t *locale)
+collate_end_weight (struct linereader *ldfile, struct localedef_t *locale)
{
struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
element_t *pelem = collate->current_element;
@@ -1951,3 +2986,239 @@ collate_end_weight (struct linereader *lr, struct localedef_t *locale)
if (collate->kind != undefined)
collate->last_char = pelem->name[0];
}
+
+
+/* The parser for the LC_CTYPE section of the locale definition. */
+void
+read_lc_collate (struct linereader *ldfile, struct localedef_t *result,
+ struct charmap_t *charmap, struct repertoire_t *repertoire,
+ int ignore_content)
+{
+ struct locale_collate_t *collate;
+ int did_copy = 0;
+ const char *save_str;
+
+ /* The rest of the line containing `LC_COLLATE' must be free. */
+ lr_ignore_rest (ldfile, 1);
+
+ now = lr_token (ldfile, charmap, NULL);
+ nowtok = now->tok;
+
+ /* If we see `copy' now we are almost done. */
+ if (nowtok == tok_copy)
+ {
+ handle_copy (ldfile, charmap, tok_lc_collate, LC_COLLATE, "LC_COLLATE",
+ ignore_content);
+ did_copy = 1;
+ }
+
+ /* Prepare the data structures. */
+ collate_startup (ldfile, result, charmap, ignore_content);
+ collate = result->categories[LC_COLLATE].collate;
+
+ while (1)
+ {
+ /* Of course we don't proceed beyond the end of file. */
+ if (nowtok == tok_eof)
+ break;
+
+ /* Ingore empty lines. */
+ if (nowtok == tok_eol)
+ {
+ now = lr_token (ldfile, charmap, NULL);
+ nowtok = now->tok;
+ continue;
+ }
+
+ switch (nowtok)
+ {
+ case tok_coll_weight_max:
+ if (did_copy)
+ goto err_label;
+ /* The rest of the line must be a single integer value. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_number)
+ goto err_label;
+ /* We simply forget about the value we just read, the implementation
+ has no fixed limits. */
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_script:
+ if (did_copy)
+ goto err_label;
+ /* We expect the name of the script in brackets. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
+ goto err_label;
+ if (now->tok != tok_bsymbol)
+ {
+ lr_error (ldfile, _("\
+script name `%s' must not duplicate any known name"),
+ tok->val.str.startmb);
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ collate->scripts = xmalloc (collate->scripts,
+ (collate->nscripts
+ * sizeof (const char *)));
+ collate->scripts[collate->nscripts++] = tok->val.str.startmb;
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_collating_element:
+ if (did_copy)
+ goto err_label;
+ /* Get the first argument, a symbol in brackets. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_bsymbol)
+ goto err_label;
+ /* Test it. */
+ if (collate_element_to (ldfile, collate, now, charmap, repertoire))
+ {
+ /* An error occurred. */
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ save_str = tok->val.str.startmb;
+ /* Next comes `from'. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_from)
+ goto err_label;
+ /* Now comes a string. */
+ now = lr_token (ldfile, charmap, repertoire);
+ if (now->tok != tok_string)
+ goto err_label;
+ collate_element_from (ldfile, collate, save_str, now, charmap,
+ repertoire);
+ /* The rest of the line should be empty. */
+ lr_ignore_rest (ldfile, 1);
+ break;
+
+ case tok_collating_symbol:
+ if (did_copy)
+ goto err_label;
+ /* Get the argument, a single symbol in brackets. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_bsymbol)
+ goto err_label;
+ collate_symbol (ldfile, collate, now, charmap, repertoire);
+ break;
+
+ case tok_order_start:
+ if (did_copy)
+ goto err_label;
+
+ /* We expect now a scripting symbol or start right away
+ with the order keywords. Or we have no argument at all
+ in which means `forward'. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok == tok_eol)
+ {
+ static enum coll_sort_rule default_rule = sort_forward;
+ /* Use a single `forward' rule. */
+ collate->nrules = 1;
+ collate->rules = &default_rule;
+ }
+ else
+ {
+ /* XXX We don't recognize the ISO 14651 extensions yet. */
+ uint32_t nrules = 0;
+ uint32_t nrules_max = 32;
+ enum coll_sort_rule *rules = alloca (nrules_max
+ * sizeof (*rules));
+ int saw_semicolon = 0;
+
+ memset (rules, '\0', nrules_max * sizeof (*rules));
+ do
+ {
+ if (now->tok != tok_forward && now->tok != tok_backward
+ && now->tok != tok_position)
+ goto err_label;
+
+ if (saw_semicolon)
+ {
+ if (nrules == nrules_max)
+ {
+ newp = alloca (nrules_max * 2 * sizeof (*rules));
+ rules = memcpy (newp, rules,
+ nrules_max * sizeof (*rules));
+ memset (&rules[nrules_max], '\0',
+ nrules_max * sizeof (*rules));
+ nrules_max *= 2;
+ }
+ ++nrules;
+ }
+
+ switch (now->tok)
+ {
+ case tok_forward:
+ if ((rules[nrules] & sort_backward) != 0)
+ {
+ lr_error (ldfile, _("\
+`forward' and `backward' order exclude each other"));
+ lr_ignore_rest (ldfile, 0);
+ goto error_sort;
+ }
+ rules[nrules] |= sort_forward;
+ break;
+ case tok_backward:
+ if ((rules[nrules] & sort_forward) != 0)
+ {
+ lr_error (ldfile, _("\
+`forward' and `backward' order exclude each other"));
+ lr_ignore_rest (ldfile, 0);
+ goto error_sort;
+ }
+ rules[nrules] |= sort_backward;
+ break;
+ case tok_position:
+ rules[nrules] |= tok_position;
+ break;
+ }
+
+ /* Get the next token. This is either the end of the line,
+ a comma or a semicolon. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok == tok_comma || now->tok == tok_semicolon)
+ {
+ saw_semicolon = now->tok == tok_semicolon;
+ now = lr_token (ldfile, charmap, NULL);
+ }
+ }
+ while (now->tok != tok_eol || now->tok != tok_eof);
+
+ error_sort:
+ collate->nrules = nrules;
+ collate->rules = memcpy (xmalloc (nrules * sizeof (*rules)),
+ rules, nrules * sizeof (*rules));
+ }
+
+ /* Now read the rules. */
+ read_rules (ldfile, collate, charmap, repertoire);
+ break;
+
+ case tok_reorder_after:
+ break;
+
+ case tok_reorder_script_after:
+ break;
+
+ default:
+ err_label:
+ if (now->tok != tok_eof)
+ SYNTAX_ERROR (_("syntax error in %s locale definition"),
+ "LC_COLLATE");
+ }
+
+ /* Prepare for the next round. */
+ now = lr_token (ldfile, charmap, NULL);
+ nowtok = now->tok;
+ }
+
+ /* When we come here we reached the end of the file. */
+ lr_error (ldfile, _("premature end of file while reading category `%s'"),
+ "LC_COLLATE");
+}
+
+#endif