aboutsummaryrefslogtreecommitdiff
path: root/locale/programs/ld-collate.c
diff options
context:
space:
mode:
Diffstat (limited to 'locale/programs/ld-collate.c')
-rw-r--r--locale/programs/ld-collate.c2610
1 files changed, 388 insertions, 2222 deletions
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index e1c4d14f9e..42fd601064 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -21,8 +21,10 @@
# include <config.h>
#endif
+#include <errno.h>
#include <error.h>
#include <stdlib.h>
+#include <wchar.h>
#include "charmap.h"
#include "localeinfo.h"
@@ -67,6 +69,8 @@ struct element_list_t
/* Data type for collating element. */
struct element_t
{
+ const char *name;
+
const char *mbs;
const uint32_t *wcs;
int order;
@@ -85,6 +89,11 @@ struct element_t
struct element_t *next;
};
+/* Special element value. */
+#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
+#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
+#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
+
/* Data type for collating symbol. */
struct symbol_t
{
@@ -124,8 +133,8 @@ struct locale_collate_t
/* This is the cursor for `reorder_after' insertions. */
struct element_t *cursor;
- /* Remember whether last weight was an ellipsis. */
- int was_ellipsis;
+ /* This value is used when handling ellipsis. */
+ struct element_t ellipsis_weight;
/* Known collating elements. */
hash_table elem_table;
@@ -168,14 +177,25 @@ make_seclist_elem (struct locale_collate_t *collate, const char *string,
static struct element_t *
new_element (struct locale_collate_t *collate, const char *mbs,
- size_t len, const uint32_t *wcs)
+ const uint32_t *wcs, const char *name, size_t namelen)
{
struct element_t *newp;
newp = (struct element_t *) obstack_alloc (&collate->mempool,
sizeof (*newp));
- newp->mbs = obstack_copy0 (&collate->mempool, mbs, len);
- newp->wcs = wcs;
+ newp->name = name == NULL ? NULL : obstack_copy (&collate->mempool,
+ name, namelen);
+ newp->mbs = mbs;
+ if (wcs != NULL)
+ {
+ size_t nwcs = wcslen ((wchar_t *) wcs) + 1;
+ uint32_t zero = 0;
+ obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
+ obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
+ newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
+ }
+ else
+ newp->wcs = NULL;
newp->order = 0;
/* Will be allocated later. */
@@ -437,14 +457,14 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate,
result = sym->order;
if (result == NULL)
- result = sym->order = new_element (collate, str, len, NULL);
+ result = sym->order = new_element (collate, NULL, NULL, NULL, 0);
}
else if (find_entry (&collate->elem_table, str, len,
(void **) &result) != 0)
{
- /* It's also no collation element. So it is an element defined
- later. */
- result = new_element (collate, str, len, wcstr);
+ /* It's also no collation element. So it is an character
+ element defined later. */
+ result = new_element (collate, NULL, NULL, str, len);
if (result != NULL)
/* Insert it into the sequence table. */
insert_entry (&collate->seq_table, str, len, result);
@@ -456,9 +476,20 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate,
static void
+unlink_element (struct locale_collate_t *collate)
+{
+ if (collate->cursor->next != NULL)
+ collate->cursor->next->last = collate->cursor->last;
+ if (collate->cursor->last != NULL)
+ collate->cursor->last->next = collate->cursor->next;
+ collate->cursor = collate->cursor->last;
+}
+
+
+static void
insert_weights (struct linereader *ldfile, struct element_t *elem,
struct charmap_t *charmap, struct repertoire_t *repertoire,
- struct locale_collate_t *collate)
+ struct locale_collate_t *collate, enum token_t ellipsis)
{
int weight_cnt;
struct token *arg;
@@ -494,7 +525,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
elem->weights[weight_cnt].w = (struct element_t **)
obstack_alloc (&collate->mempool, sizeof (struct element_t *));
elem->weights[weight_cnt].w[0] = NULL;
- elem->weights[weight_cnt].cnt = 0;
+ elem->weights[weight_cnt].cnt = 1;
}
else if (arg->tok == tok_bsymbol)
{
@@ -583,6 +614,24 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
/* We don't need the string anymore. */
free (arg->val.str.startmb);
}
+ else if (ellipsis != tok_none
+ && (arg->tok == tok_ellipsis2
+ || arg->tok == tok_ellipsis3
+ || arg->tok == tok_ellipsis4))
+ {
+ /* It must be the same ellipsis as used in the initial column. */
+ if (arg->tok != ellipsis)
+ lr_error (ldfile, _("\
+%s: weights must use the same ellipsis symbol as the name"),
+ "LC_COLLATE");
+
+ /* The weight for this level has to be ignored. We use the
+ null pointer to indicate this. */
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool, sizeof (struct element_t *));
+ elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
+ elem->weights[weight_cnt].cnt = 1;
+ }
else
{
syntax:
@@ -644,14 +693,20 @@ insert_value (struct linereader *ldfile, struct token *arg,
uint32_t wc;
struct element_t *elem = NULL;
- /* First determine the wide character. There must be such a value,
- otherwise we ignore it (if it is no collatio symbol or element). */
- wc = repertoire_find_value (repertoire, arg->val.str.startmb,
- arg->val.str.lenmb);
-
/* Try to find the character in the charmap. */
seq = charmap_find_value (charmap, arg->val.str.startmb, arg->val.str.lenmb);
+ /* Determine the wide character. */
+ if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ {
+ wc = repertoire_find_value (repertoire, arg->val.str.startmb,
+ arg->val.str.lenmb);
+ if (seq != NULL)
+ seq->ucs4 = wc;
+ }
+ else
+ wc = seq->ucs4;
+
if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
{
/* It's no character, so look through the collation elements and
@@ -666,9 +721,7 @@ insert_value (struct linereader *ldfile, struct token *arg,
elem = sym->order;
if (elem == NULL)
- elem = sym->order = new_element (collate, arg->val.str.startmb,
- arg->val.str.lenmb,
- arg->val.str.startwc);
+ elem = sym->order = new_element (collate, NULL, NULL, NULL, 0);
}
else if (find_entry (&collate->elem_table, arg->val.str.startmb,
arg->val.str.lenmb, (void **) &elem) != 0)
@@ -684,16 +737,17 @@ insert_value (struct linereader *ldfile, struct token *arg,
if (find_entry (&collate->seq_table, arg->val.str.startmb,
arg->val.str.lenmb, (void **) &elem) != 0)
{
+ uint32_t wcs[2] = { wc, 0 };
+
/* We have to allocate an entry. */
- elem = new_element (collate, arg->val.str.startmb,
- arg->val.str.lenmb,
- arg->val.str.startwc);
+ elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
+ wcs, arg->val.str.startmb, arg->val.str.lenmb);
/* And add it to the table. */
if (insert_entry (&collate->seq_table, arg->val.str.startmb,
arg->val.str.lenmb, elem) != 0)
/* This cannot happen. */
- abort ();
+ assert (! "Internal error");
}
}
@@ -708,7 +762,229 @@ insert_value (struct linereader *ldfile, struct token *arg,
return;
}
- insert_weights (ldfile, elem, charmap, repertoire, collate);
+ insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none);
+}
+
+
+static void
+handle_ellipsis (struct linereader *ldfile, struct token *arg,
+ enum token_t ellipsis, struct charmap_t *charmap,
+ struct repertoire_t *repertoire,
+ struct locale_collate_t *collate)
+{
+ struct element_t *startp;
+ struct element_t *endp;
+
+ /* Unlink the entry added for the ellipsis. */
+ unlink_element (collate);
+ startp = collate->cursor;
+
+ /* Process and add the end-entry. */
+ if (arg != NULL)
+ insert_value (ldfile, arg, charmap, repertoire, collate);
+
+ /* Reset the cursor. */
+ collate->cursor = startp;
+
+ /* Now we have to handle many different situations:
+ - we have to distinguish between the three different ellipsis forms
+ - the is the ellipsis at the beginning, in the middle, or at the end.
+ */
+ endp = collate->cursor->next;
+ assert (arg == NULL || endp != NULL);
+
+ /* Both, the start and the end symbol, must stand for characters. */
+ if ((startp == NULL || startp->name == NULL)
+ || (endp == NULL || endp->name == NULL))
+ {
+ lr_error (ldfile, _("\
+%s: the start end the end symbol of a range must stand for characters"),
+ "LC_COLLATE");
+ return;
+ }
+
+ if (ellipsis == tok_ellipsis3)
+ {
+ /* XXX */
+ }
+ else
+ {
+ /* For symbolic range we naturally must have a beginning and an
+ end specified by the user. */
+ if (startp == NULL)
+ lr_error (ldfile, _("\
+%s: symbolic range ellipsis must not directly follow `order_start'"),
+ "LC_COLLATE");
+ else if (endp == NULL)
+ lr_error (ldfile, _("\
+%s: symbolic range ellipsis must not be direct followed by `order_end'"),
+ "LC_COLLATE");
+ else
+ {
+ /* Determine the range. To do so we have to determine the
+ common prefix of the both names and then the numeric
+ values of both ends. */
+ size_t lenfrom = strlen (startp->name);
+ size_t lento = strlen (endp->name);
+ char buf[lento + 1];
+ int preflen = 0;
+ long int from;
+ long int to;
+ char *cp;
+ int base = ellipsis == tok_ellipsis2 ? 16 : 10;
+
+ if (lenfrom != lento)
+ {
+ invalid_range:
+ lr_error (ldfile, _("\
+`%s' and `%.*s' are no valid names for symbolic range"),
+ startp->name, lento, endp->name);
+ return;
+ }
+
+ while (startp->name[preflen] == endp->name[preflen])
+ if (startp->name[preflen] == '\0')
+ /* Nothing to be done. The start and end point are identical
+ and while inserting the end point we have already given
+ the user an error message. */
+ return;
+ else
+ ++preflen;
+
+ errno = 0;
+ from = strtol (startp->name + preflen, &cp, base);
+ if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
+ goto invalid_range;
+
+ errno = 0;
+ to = strtol (endp->name + preflen, &cp, base);
+ if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
+ goto invalid_range;
+
+ /* Copy the prefix. */
+ memcpy (buf, startp->name, preflen);
+
+ /* Loop over all values. */
+ for (++from; from < to; ++from)
+ {
+ struct element_t *elem = NULL;
+ struct charseq *seq;
+ uint32_t wc;
+ int cnt;
+
+ /* Generate the the name. */
+ sprintf (buf + preflen, base == 10 ? "%d" : "%x", from);
+
+ /* Look whether this name is already defined. */
+ if (find_entry (&collate->seq_table, arg->val.str.startmb,
+ arg->val.str.lenmb, (void **) &elem) == 0)
+ {
+ if (elem->next != NULL || (collate->cursor != NULL
+ && elem->next == collate->cursor))
+ {
+ lr_error (ldfile, _("\
+%s: order for `%.*s' already defined at %s:%Z"),
+ "LC_COLLATE", lenfrom, buf,
+ elem->file, elem->line);
+ continue;
+ }
+
+ if (elem->name == NULL)
+ {
+ lr_error (ldfile, _("%s: `%s' must be a charater"),
+ "LC_COLLATE", buf);
+ continue;
+ }
+ }
+
+ if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
+ {
+ /* Search for a character of this name. */
+ seq = charmap_find_value (charmap, buf, lenfrom);
+ if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ {
+ wc = repertoire_find_value (repertoire, buf, lenfrom);
+
+ if (seq != NULL)
+ seq->ucs4 = wc;
+ }
+ else
+ wc = seq->ucs4;
+
+ if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
+ /* We don't know anything about a character with this
+ name. XXX Should we warn? */
+ continue;
+
+ if (elem == NULL)
+ {
+ uint32_t wcs[2] = { wc, 0 };
+
+ /* We have to allocate an entry. */
+ elem = new_element (collate,
+ seq != NULL ? seq->bytes : NULL,
+ wc == ILLEGAL_CHAR_VALUE
+ ? NULL : wcs,
+ buf, lenfrom);
+ }
+ else
+ {
+ /* Update the element. */
+ if (seq != NULL)
+ elem->mbs = obstack_copy0 (&collate->mempool,
+ seq->bytes, seq->nbytes);
+
+ if (wc != ILLEGAL_CHAR_VALUE)
+ {
+ uint32_t zero = 0;
+
+ obstack_grow (&collate->mempool,
+ &wc, sizeof (uint32_t));
+ obstack_grow (&collate->mempool,
+ &zero, sizeof (uint32_t));
+ elem->wcs = obstack_finish (&collate->mempool);
+ }
+ }
+
+ elem->file = ldfile->fname;
+ elem->line = ldfile->lineno;
+ }
+
+ /* Enqueue the new element. */
+ elem->last = collate->cursor;
+ elem->next = collate->cursor->next;
+ elem->last->next = elem;
+ if (elem->next != NULL)
+ elem->next->last = elem;
+ collate->cursor = elem;
+
+ /* Now add the weights. They come from the `ellipsis_weights'
+ member of `collate'. */
+ elem->weights = (struct element_list_t *)
+ obstack_alloc (&collate->mempool,
+ nrules * sizeof (struct element_list_t));
+ for (cnt = 0; cnt < nrules; ++cnt)
+ if (collate->ellipsis_weight.weights[cnt].cnt == 1
+ && (collate->ellipsis_weight.weights[cnt].w[0]
+ == ELEMENT_ELLIPSIS2))
+ {
+ elem->weights[cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool,
+ sizeof (struct element_t *));
+ elem->weights[cnt].w[0] = elem;
+ elem->weights[cnt].cnt = 1;
+ }
+ else
+ {
+ /* Simly use the weight from `ellipsis_weight'. */
+ elem->weights[cnt].w =
+ collate->ellipsis_weight.weights[cnt].w;
+ elem->weights[cnt].cnt =
+ collate->ellipsis_weight.weights[cnt].cnt;
+ }
+ }
+ }
+ }
}
@@ -768,7 +1044,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
struct token *arg = NULL;
enum token_t nowtok;
int state = 0;
- int was_ellipsis = 0;
+ enum token_t was_ellipsis = tok_none;
struct localedef_t *copy_locale = NULL;
/* Get the repertoire we have to use. */
@@ -981,10 +1257,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
if (insert_entry (&collate->elem_table,
symbol, symbol_len,
new_element (collate,
- arg->val.str.startmb,
- arg->val.str.lenmb,
- arg->val.str.startwc))
- < 0)
+ NULL, NULL, NULL, 0)) < 0)
lr_error (ldfile, _("\
error while adding collating element"));
}
@@ -1244,6 +1517,12 @@ error while adding equivalent collating symbol"));
if (state != 1)
goto err_label;
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis)
+ /* XXX */
+ abort ();
+
state = 2;
lr_ignore_rest (ldfile, 1);
break;
@@ -1257,7 +1536,18 @@ error while adding equivalent collating symbol"));
break;
}
- if (state != 2 && state != 3)
+ if (state == 1)
+ {
+ lr_error (ldfile, _("%s: missing `order_end' keyword"),
+ "LC_COLLATE");
+ state = 2;
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis)
+ /* XXX */
+ abort ();
+ }
+ else if (state != 2 && state != 3)
goto err_label;
state = 3;
@@ -1313,7 +1603,24 @@ error while adding equivalent collating symbol"));
break;
}
- if (state != 2 && state != 4)
+ if (state == 1)
+ {
+ lr_error (ldfile, _("%s: missing `order_end' keyword"),
+ "LC_COLLATE");
+ state = 2;
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis)
+ /* XXX */
+ abort ();
+ }
+ else if (state == 3)
+ {
+ error (0, 0, _("%s: missing `reorder-end' keyword"),
+ "LC_COLLATE");
+ state = 4;
+ }
+ else if (state != 2 && state != 4)
goto err_label;
state = 5;
@@ -1490,6 +1797,23 @@ error while adding equivalent collating symbol"));
read_directions (ldfile, arg, charmap, repertoire,
collate);
}
+ break;
+ }
+ else if (was_ellipsis != tok_none)
+ {
+ /* Using the information in the `ellipsis_weight'
+ element and this and the last value we have to handle
+ the ellipsis now. */
+ assert (state == 1);
+
+ handle_ellipsis (ldfile, arg, was_ellipsis, charmap, repertoire,
+ collate);
+
+ /* Remember that we processed the ellipsis. */
+ was_ellipsis = tok_none;
+
+ /* And don't add the value a second time. */
+ break;
}
/* Now insert in the new place. */
@@ -1508,37 +1832,48 @@ error while adding equivalent collating symbol"));
if (state != 1)
goto err_label;
+ if (was_ellipsis != tok_none)
+ {
+ lr_error (ldfile,
+ _("%s: cannot have `%s' as end of ellipsis range"),
+ "LC_COLLATE", "UNDEFINED");
+
+ unlink_element (collate);
+ was_ellipsis = tok_none;
+ }
+
/* See whether UNDEFINED already appeared somewhere. */
if (collate->undefined.next != NULL
|| (collate->cursor != NULL
&& collate->undefined.next == collate->cursor))
{
- lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"),
- 9, "UNDEFINED", collate->undefined.file,
+ lr_error (ldfile,
+ _("%s: order for `%.*s' already defined at %s:%Z"),
+ "LC_COLLATE", 9, "UNDEFINED", collate->undefined.file,
collate->undefined.line);
lr_ignore_rest (ldfile, 0);
}
else
/* Parse the weights. */
insert_weights (ldfile, &collate->undefined, charmap,
- repertoire, collate);
+ repertoire, collate, tok_none);
break;
+ case tok_ellipsis2:
case tok_ellipsis3:
- /* Ignore the rest of the line if we don't need the input of
- this line. */
- if (ignore_content)
- {
- lr_ignore_rest (ldfile, 0);
- break;
- }
+ case tok_ellipsis4:
+ /* This is the symbolic (decimal or hexadecimal) or absolute
+ ellipsis. */
+ if (was_ellipsis != tok_none)
+ goto err_label;
if (state != 1 && state != 3)
goto err_label;
- was_ellipsis = 1;
- /* XXX Read the remainder of the line and remember what are
- the weights. */
+ was_ellipsis = nowtok;
+
+ insert_weights (ldfile, &collate->ellipsis_weight, charmap,
+ repertoire, collate, nowtok);
break;
case tok_end:
@@ -1552,8 +1887,15 @@ error while adding equivalent collating symbol"));
_("%s: empty category description not allowed"),
"LC_COLLATE");
else if (state == 1)
- lr_error (ldfile, _("%s: missing `order_end' keyword"),
- "LC_COLLATE");
+ {
+ lr_error (ldfile, _("%s: missing `order_end' keyword"),
+ "LC_COLLATE");
+
+ /* Handle ellipsis at end of list. */
+ if (was_ellipsis)
+ /* XXX */
+ abort ();
+ }
else if (state == 3)
error (0, 0, _("%s: missing `reorder-end' keyword"),
"LC_COLLATE");
@@ -1585,2179 +1927,3 @@ error while adding equivalent collating symbol"));
/* When we come here we reached the end of the file. */
lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
}
-
-
-#if 0
-
-/* What kind of symbols get defined? */
-enum coll_symbol
-{
- undefined,
- ellipsis,
- character,
- element,
- symbol
-};
-
-
-typedef struct patch_t
-{
- const char *fname;
- size_t lineno;
- const char *token;
- union
- {
- unsigned int *pos;
- size_t idx;
- } where;
- struct patch_t *next;
-} patch_t;
-
-
-typedef struct element_t
-{
- const char *namemb;
- const uint32_t *namewc;
- unsigned int this_weight;
-
- struct element_t *next;
-
- unsigned int *ordering;
- size_t ordering_len;
-} element_t;
-
-
-/* The real definition of the struct for the LC_COLLATE locale. */
-struct locale_collate_t
-{
- /* Collate symbol table. Simple mapping to number. */
- hash_table symbols;
-
- /* The collation elements. */
- hash_table elements;
- struct obstack element_mem;
-
- /* The result tables. */
- hash_table resultmb;
- hash_table resultwc;
-
- /* Sorting rules given in order_start line. */
- uint32_t nrules;
- enum coll_sort_rule *rules;
-
- /* Used while recognizing symbol composed of multiple tokens
- (collating-element). */
- const char *combine_token;
- size_t combine_token_len;
-
- /* How many sorting order specifications so far. */
- unsigned int order_cnt;
-
- /* Was lastline ellipsis? */
- int was_ellipsis;
- /* Value of last entry if was character. */
- uint32_t last_char;
- /* Current element. */
- element_t *current_element;
- /* What kind of symbol is current element. */
- enum coll_symbol kind;
-
- /* Patch lists. */
- patch_t *current_patch;
- patch_t *all_patches;
-
- /* Room for the UNDEFINED information. */
- element_t undefined;
- unsigned int undefined_len;
-
- /* Script information. */
- const char **scripts;
- unsigned int nscripts;
-};
-
-
-/* Be verbose? Defined in localedef.c. */
-extern int verbose;
-
-
-
-#define obstack_chunk_alloc malloc
-#define obstack_chunk_free free
-
-
-/* Prototypes for local functions. */
-static void collate_startup (struct linereader *ldfile,
- struct localedef_t *locale,
- struct charmap_t *charmap, int ignore_content);
-
-
-static void
-collate_startup (struct linereader *ldfile, struct localedef_t *locale,
- struct charmap_t *charset, int ignore_content)
-{
- struct locale_collate_t *collate;
-
- /* Allocate the needed room. */
- locale->categories[LC_COLLATE].collate = collate =
- (struct locale_collate_t *) xmalloc (sizeof (struct locale_collate_t));
-
- /* Allocate hash table for collating elements. */
- if (init_hash (&collate->elements, 512))
- error (4, 0, _("memory exhausted"));
- collate->combine_token = NULL;
- obstack_init (&collate->element_mem);
-
- /* Allocate hash table for collating elements. */
- if (init_hash (&collate->symbols, 64))
- error (4, 0, _("memory exhausted"));
-
- /* Allocate hash table for result. */
- if (init_hash (&collate->result, 512))
- error (4, 0, _("memory exhausted"));
-
- collate->nrules = 0;
- collate->nrules_max = 10;
- collate->rules
- = (enum coll_sort_rule *) xmalloc (collate->nrules_max
- * sizeof (enum coll_sort_rule));
-
- collate->order_cnt = 1; /* The smallest weight is 2. */
-
- collate->was_ellipsis = 0;
- collate->last_char = L'\0'; /* 0 because leading ellipsis is allowed. */
-
- collate->all_patches = NULL;
-
- /* This tells us no UNDEFINED entry was found until now. */
- memset (&collate->undefined, '\0', sizeof (collate->undefined));
-
- ldfile->translate_strings = 0;
- ldfile->return_widestr = 0;
-}
-
-
-void
-collate_finish (struct localedef_t *locale, struct charset_t *charset,
- struct repertoire_t *repertoire)
-{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
- patch_t *patch;
- size_t cnt;
-
- /* Patch the constructed table so that forward references are
- correctly filled. */
- for (patch = collate->all_patches; patch != NULL; patch = patch->next)
- {
- uint32_t wch;
- size_t toklen = strlen (patch->token);
- void *ptmp;
- unsigned int value = 0;
-
- wch = charset_find_value (&charset->char_table, patch->token, toklen);
- if (wch != ILLEGAL_CHAR_VALUE)
- {
- element_t *runp;
-
- if (find_entry (&collate->result, &wch, sizeof (uint32_t),
- (void *) &runp) < 0)
- runp = NULL;
- for (; runp != NULL; runp = runp->next)
- if (runp->name[0] == wch && runp->name[1] == L'\0')
- break;
-
- value = runp == NULL ? 0 : runp->this_weight;
- }
- else if (find_entry (&collate->elements, patch->token, toklen, &ptmp)
- >= 0)
- {
- value = ((element_t *) ptmp)->this_weight;
- }
- else if (find_entry (&collate->symbols, patch->token, toklen, &ptmp)
- >= 0)
- {
- value = (unsigned long int) ptmp;
- }
- else
- value = 0;
-
- if (value == 0)
- {
- if (!be_quiet)
- error_at_line (0, 0, patch->fname, patch->lineno,
- _("no weight defined for symbol `%s'"),
- patch->token);
- }
- else
- *patch->where.pos = value;
- }
-
- /* If no definition for UNDEFINED is given, all characters in the
- given charset must be specified. */
- if (collate->undefined.ordering == NULL)
- {
- /**************************************************************\
- |* XXX We should test whether really an unspecified character *|
- |* exists before giving the message. *|
- \**************************************************************/
- uint32_t weight;
-
- if (!be_quiet)
- error (0, 0, _("no definition of `UNDEFINED'"));
-
- collate->undefined.ordering_len = collate->nrules;
- weight = ++collate->order_cnt;
-
- for (cnt = 0; cnt < collate->nrules; ++cnt)
- {
- uint32_t one = 1;
- obstack_grow (&collate->element_mem, &one, sizeof (one));
- }
-
- for (cnt = 0; cnt < collate->nrules; ++cnt)
- obstack_grow (&collate->element_mem, &weight, sizeof (weight));
-
- collate->undefined.ordering = obstack_finish (&collate->element_mem);
- }
-
- collate->undefined_len = 2; /* For the name: 1 x uint32_t + L'\0'. */
- for (cnt = 0; cnt < collate->nrules; ++cnt)
- collate->undefined_len += 1 + collate->undefined.ordering[cnt];
-}
-
-
-
-void
-collate_output (struct localedef_t *locale, struct charset_t *charset,
- struct repertoire_t *repertoire, const char *output_path)
-{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
- uint32_t table_size, table_best, level_best, sum_best;
- void *last;
- element_t *pelem;
- uint32_t *name;
- size_t len;
- const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
- struct iovec iov[2 + nelems];
- struct locale_file data;
- uint32_t idx[nelems];
- struct obstack non_simple;
- struct obstack string_pool;
- size_t cnt, entry_size;
- uint32_t undefined_offset = UINT_MAX;
- uint32_t *table, *extra, *table2, *extra2;
- size_t extra_len;
- uint32_t element_hash_tab_size;
- uint32_t *element_hash_tab;
- uint32_t *element_hash_tab_ob;
- uint32_t element_string_pool_size;
- char *element_string_pool;
- uint32_t element_value_size;
- uint32_t *element_value;
- uint32_t *element_value_ob;
- uint32_t symbols_hash_tab_size;
- uint32_t *symbols_hash_tab;
- uint32_t *symbols_hash_tab_ob;
- uint32_t symbols_string_pool_size;
- char *symbols_string_pool;
- uint32_t symbols_class_size;
- uint32_t *symbols_class;
- uint32_t *symbols_class_ob;
- hash_table *hash_tab;
- unsigned int dummy_weights[collate->nrules + 1];
-
- sum_best = UINT_MAX;
- table_best = 0xffff;
- level_best = 0xffff;
-
- /* Compute table size. */
- if (!be_quiet)
- fputs (_("\
-Computing table size for collation information might take a while..."),
- stderr);
- for (table_size = 256; table_size < sum_best; ++table_size)
- {
- size_t hits[table_size];
- unsigned int worst = 1;
- size_t cnt;
-
- last = NULL;
-
- for (cnt = 0; cnt < 256; ++cnt)
- hits[cnt] = 1;
- memset (&hits[256], '\0', sizeof (hits) - 256 * sizeof (size_t));
-
- while (iterate_table (&collate->result, &last, (const void **) &name,
- &len, (void **) &pelem) >= 0)
- if (pelem->ordering != NULL && pelem->name[0] > 0xff)
- if (++hits[(unsigned int) pelem->name[0] % table_size] > worst)
- {
- worst = hits[(unsigned int) pelem->name[0] % table_size];
- if (table_size * worst > sum_best)
- break;
- }
-
- if (table_size * worst < sum_best)
- {
- sum_best = table_size * worst;
- table_best = table_size;
- level_best = worst;
- }
- }
- assert (table_best != 0xffff || level_best != 0xffff);
- if (!be_quiet)
- fputs (_(" done\n"), stderr);
-
- obstack_init (&non_simple);
- obstack_init (&string_pool);
-
- data.magic = LIMAGIC (LC_COLLATE);
- data.n = nelems;
- iov[0].iov_base = (void *) &data;
- iov[0].iov_len = sizeof (data);
-
- iov[1].iov_base = (void *) idx;
- iov[1].iov_len = sizeof (idx);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (uint32_t);
-
- table = (uint32_t *) alloca (collate->nrules * sizeof (uint32_t));
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len
- = collate->nrules * sizeof (uint32_t);
- /* Another trick here. Describing the collation method needs only a
- few bits (3, to be exact). But the binary file should be
- accessible by machines with both endianesses and so we store both
- forms in the same word. */
- for (cnt = 0; cnt < collate->nrules; ++cnt)
- table[cnt] = collate->rules[cnt] | bswap_32 (collate->rules[cnt]);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (uint32_t);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len
- = sizeof (uint32_t);
-
- entry_size = 1 + MAX (collate->nrules, 2);
-
- table = (uint32_t *) alloca (table_best * level_best * entry_size
- * sizeof (table[0]));
- memset (table, '\0', table_best * level_best * entry_size
- * sizeof (table[0]));
-
-
- /* Macros for inserting in output table. */
-#define ADD_VALUE(expr) \
- do { \
- uint32_t to_write = (uint32_t) expr; \
- obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
- } while (0)
-
-#define ADD_ELEMENT(pelem, len) \
- do { \
- size_t cnt, idx; \
- \
- ADD_VALUE (len); \
- \
- wlen = wcslen (pelem->name); \
- obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (uint32_t)); \
- \
- idx = collate->nrules; \
- for (cnt = 0; cnt < collate->nrules; ++cnt) \
- { \
- size_t disp; \
- \
- ADD_VALUE (pelem->ordering[cnt]); \
- for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
- ADD_VALUE (pelem->ordering[idx++]); \
- } \
- } while (0)
-
-#define ADD_FORWARD(pelem) \
- do { \
- /* We leave a reference in the main table and put all \
- information in the table for the extended entries. */ \
- element_t *runp; \
- element_t *has_simple = NULL; \
- size_t wlen; \
- \
- table[(level * table_best + slot) * entry_size + 1] \
- = FORWARD_CHAR; \
- table[(level * table_best + slot) * entry_size + 2] \
- = obstack_object_size (&non_simple) / sizeof (uint32_t); \
- \
- /* Here we have to construct the non-simple table entry. First \
- compute the total length of this entry. */ \
- for (runp = (pelem); runp != NULL; runp = runp->next) \
- if (runp->ordering != NULL) \
- { \
- uint32_t value; \
- size_t cnt; \
- \
- value = 1 + wcslen (runp->name) + 1; \
- \
- for (cnt = 0; cnt < collate->nrules; ++cnt) \
- /* We have to take care for entries without ordering \
- information. While reading them they get inserted in the \
- table and later not removed when something goes wrong with \
- reading its weights. */ \
- value += 1 + runp->ordering[cnt]; \
- \
- if (runp->name[1] == L'\0') \
- has_simple = runp; \
- \
- ADD_ELEMENT (runp, value); \
- } \
- \
- if (has_simple == NULL) \
- { \
- size_t idx, cnt; \
- \
- ADD_VALUE (collate->undefined_len + 1); \
- \
- /* Add the name. */ \
- ADD_VALUE ((pelem)->name[0]); \
- ADD_VALUE (0); \
- \
- idx = collate->nrules; \
- for (cnt = 0; cnt < collate->nrules; ++cnt) \
- { \
- size_t disp; \
- \
- ADD_VALUE (collate->undefined.ordering[cnt]); \
- for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
- { \
- if ((uint32_t) collate->undefined.ordering[idx] \
- == ELLIPSIS_CHAR) \
- ADD_VALUE ((pelem)->name[0]); \
- else \
- ADD_VALUE (collate->undefined.ordering[idx++]); \
- ++idx; \
- } \
- } \
- } \
- } while (0)
-
-
-
- /* Fill the table now. First we look for all the characters which
- fit into one single byte. This speeds up the 8-bit string
- functions. */
- last = NULL;
- while (iterate_table (&collate->result, &last, (const void **) &name,
- &len, (void **) &pelem) >= 0)
- if (pelem->name[0] <= 0xff)
- {
- /* We have a single byte name. Now we must distinguish
- between entries in simple form (i.e., only one value per
- weight and no collation element starting with the same
- character) and those which are not. */
- size_t slot = ((size_t) pelem->name[0]);
- const size_t level = 0;
-
- table[slot * entry_size] = pelem->name[0];
-
- if (pelem->name[1] == L'\0' && pelem->next == NULL
- && pelem->ordering_len == collate->nrules)
- {
- /* Yes, we have a simple one. Lucky us. */
- size_t cnt;
-
- for (cnt = 0; cnt < collate->nrules; ++cnt)
- table[slot * entry_size + 1 + cnt]
- = pelem->ordering[collate->nrules + cnt];
- }
- else
- ADD_FORWARD (pelem);
- }
-
- /* Now check for missing single byte entries. If one exist we fill
- with the UNDEFINED entry. */
- for (cnt = 0; cnt < 256; ++cnt)
- /* The first weight is never 0 for existing entries. */
- if (table[cnt * entry_size + 1] == 0)
- {
- /* We have to fill in the information from the UNDEFINED
- entry. */
- table[cnt * entry_size] = (uint32_t) cnt;
-
- if (collate->undefined.ordering_len == collate->nrules)
- {
- size_t inner;
-
- for (inner = 0; inner < collate->nrules; ++inner)
- if ((uint32_t)collate->undefined.ordering[collate->nrules
- + inner]
- == ELLIPSIS_CHAR)
- table[cnt * entry_size + 1 + inner] = cnt;
- else
- table[cnt * entry_size + 1 + inner]
- = collate->undefined.ordering[collate->nrules + inner];
- }
- else
- {
- if (undefined_offset != UINT_MAX)
- {
- table[cnt * entry_size + 1] = FORWARD_CHAR;
- table[cnt * entry_size + 2] = undefined_offset;
- }
- else
- {
- const size_t slot = cnt;
- const size_t level = 0;
-
- ADD_FORWARD (&collate->undefined);
- undefined_offset = table[cnt * entry_size + 2];
- }
- }
- }
-
- /* Now we are ready for inserting the whole rest. */
- last = NULL;
- while (iterate_table (&collate->result, &last, (const void **) &name,
- &len, (void **) &pelem) >= 0)
- if (pelem->name[0] > 0xff)
- {
- /* Find the position. */
- size_t slot = ((size_t) pelem->name[0]) % table_best;
- size_t level = 0;
-
- while (table[(level * table_best + slot) * entry_size + 1] != 0)
- ++level;
- assert (level < level_best);
-
- if (pelem->name[1] == L'\0' && pelem->next == NULL
- && pelem->ordering_len == collate->nrules)
- {
- /* Again a simple entry. */
- size_t inner;
-
- for (inner = 0; inner < collate->nrules; ++inner)
- table[(level * table_best + slot) * entry_size + 1 + inner]
- = pelem->ordering[collate->nrules + inner];
- }
- else
- ADD_FORWARD (pelem);
- }
-
- /* Add the UNDEFINED entry. */
- {
- /* Here we have to construct the non-simple table entry. */
- size_t idx, cnt;
-
- undefined_offset = obstack_object_size (&non_simple);
-
- idx = collate->nrules;
- for (cnt = 0; cnt < collate->nrules; ++cnt)
- {
- size_t disp;
-
- ADD_VALUE (collate->undefined.ordering[cnt]);
- for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)
- ADD_VALUE (collate->undefined.ordering[idx++]);
- }
- }
-
- /* Finish the extra block. */
- extra_len = obstack_object_size (&non_simple);
- extra = (uint32_t *) obstack_finish (&non_simple);
- assert ((extra_len % sizeof (uint32_t)) == 0);
-
- /* Now we have to build the two array for the other byte ordering. */
- table2 = (uint32_t *) alloca (table_best * level_best * entry_size
- * sizeof (table[0]));
- extra2 = (uint32_t *) alloca (extra_len);
-
- for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt)
- table2[cnt] = bswap_32 (table[cnt]);
-
- for (cnt = 0; cnt < extra_len / sizeof (uint32_t); ++cnt)
- extra2[cnt] = bswap_32 (extra2[cnt]);
-
- /* We need a simple hashing table to get a collation-element->chars
- mapping. We again use internal hashing using a secondary hashing
- function.
-
- Each string has an associate hashing value V, computed by a
- fixed function. To locate the string we use open addressing with
- double hashing. The first index will be V % M, where M is the
- size of the hashing table. If no entry is found, iterating with
- a second, independent hashing function takes place. This second
- value will be 1 + V % (M - 2). The approximate number of probes
- will be
-
- for unsuccessful search: (1 - N / M) ^ -1
- for successful search: - (N / M) ^ -1 * ln (1 - N / M)
-
- where N is the number of keys.
-
- If we now choose M to be the next prime bigger than 4 / 3 * N,
- we get the values 4 and 1.85 resp. Because unsuccessful searches
- are unlikely this is a good value. Formulas: [Knuth, The Art of
- Computer Programming, Volume 3, Sorting and Searching, 1973,
- Addison Wesley] */
- if (collate->elements.filled == 0)
- {
- /* We don't need any element table since there are no collating
- elements. */
- element_hash_tab_size = 0;
- element_hash_tab = NULL;
- element_hash_tab_ob = NULL;
- element_string_pool_size = 0;
- element_string_pool = NULL;
- element_value_size = 0;
- element_value = NULL;
- element_value_ob = NULL;
- }
- else
- {
- void *ptr; /* Running pointer. */
- const char *key; /* Key for current bucket. */
- size_t keylen; /* Length of key data. */
- const element_t *data; /* Data, i.e., the character sequence. */
-
- element_hash_tab_size = next_prime ((collate->elements.filled * 4) / 3);
- if (element_hash_tab_size < 7)
- /* We need a minimum to make the following code work. */
- element_hash_tab_size = 7;
-
- element_hash_tab = obstack_alloc (&non_simple, (2 * element_hash_tab_size
- * sizeof (uint32_t)));
- memset (element_hash_tab, '\377', (2 * element_hash_tab_size
- * sizeof (uint32_t)));
-
- ptr = NULL;
- while (iterate_table (&collate->elements, &ptr, (const void **) &key,
- &keylen, (void **) &data) == 0)
- {
- size_t hash_val = hash_string (key, keylen);
- size_t idx = hash_val % element_hash_tab_size;
-
- if (element_hash_tab[2 * idx] != (~((uint32_t) 0)))
- {
- /* We need the second hashing function. */
- size_t c = 1 + (hash_val % (element_hash_tab_size - 2));
-
- do
- if (idx >= element_hash_tab_size - c)
- idx -= element_hash_tab_size - c;
- else
- idx += c;
- while (element_hash_tab[2 * idx] != (~((uint32_t) 0)));
- }
-
- element_hash_tab[2 * idx] = obstack_object_size (&non_simple);
- element_hash_tab[2 * idx + 1] = (obstack_object_size (&string_pool)
- / sizeof (uint32_t));
-
- obstack_grow0 (&non_simple, key, keylen);
- obstack_grow (&string_pool, data->name,
- (wcslen (data->name) + 1) * sizeof (uint32_t));
- }
-
- if (obstack_object_size (&non_simple) % 4 != 0)
- obstack_blank (&non_simple,
- 4 - (obstack_object_size (&non_simple) % 4));
- element_string_pool_size = obstack_object_size (&non_simple);
- element_string_pool = obstack_finish (&non_simple);
-
- element_value_size = obstack_object_size (&string_pool);
- element_value = obstack_finish (&string_pool);
-
- /* Create the tables for the other byte order. */
- element_hash_tab_ob = obstack_alloc (&non_simple,
- (2 * element_hash_tab_size
- * sizeof (uint32_t)));
- for (cnt = 0; cnt < 2 * element_hash_tab_size; ++cnt)
- element_hash_tab_ob[cnt] = bswap_U32 (element_hash_tab[cnt]);
-
- element_value_ob = obstack_alloc (&string_pool, element_value_size);
- for (cnt = 0; cnt < element_value_size / 4; ++cnt)
- element_value_ob[cnt] = bswap_32 (element_value[cnt]);
- }
-
- /* Store collation elements as map to collation class. There are
- three kinds of symbols:
- - simple characters
- - collation elements
- - collation symbols
- We need to make a table which lets the user to access the primary
- weight based on the symbol string. */
- symbols_hash_tab_size = next_prime ((4 * (charset->char_table.filled
- + collate->elements.filled
- + collate->symbols.filled)) / 3);
- symbols_hash_tab = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size
- * sizeof (uint32_t)));
- memset (symbols_hash_tab, '\377', (2 * symbols_hash_tab_size
- * sizeof (uint32_t)));
-
- /* Now fill the array. First the symbols from the character set,
- then the collation elements and last the collation symbols. */
- hash_tab = &charset->char_table;
- while (1)
- {
- void *ptr; /* Running pointer. */
- const char *key; /* Key for current bucket. */
- size_t keylen; /* Length of key data. */
- void *data; /* Data. */
-
- ptr = NULL;
- while (iterate_table (hash_tab, &ptr, (const void **) &key,
- &keylen, (void **) &data) == 0)
- {
- size_t hash_val;
- size_t idx;
- uint32_t word;
- unsigned int *weights;
-
- if (hash_tab == &charset->char_table
- || hash_tab == &collate->elements)
- {
- element_t *lastp, *firstp;
- uint32_t dummy_name[2];
- const uint32_t *name;
- size_t name_len;
-
- if (hash_tab == &charset->char_table)
- {
- dummy_name[0] = (uint32_t) ((unsigned long int) data);
- dummy_name[1] = L'\0';
- name = dummy_name;
- name_len = sizeof (uint32_t);
- }
- else
- {
- element_t *elemp = (element_t *) data;
- name = elemp->name;
- name_len = wcslen (name) * sizeof (uint32_t);
- }
-
- /* First check whether this character is used at all. */
- if (find_entry (&collate->result, name, name_len,
- (void *) &firstp) < 0)
- /* The symbol is not directly mentioned in the collation.
- I.e., we use the value for UNDEFINED. */
- lastp = &collate->undefined;
- else
- {
- /* The entry for the simple character is always found at
- the end. */
- lastp = firstp;
- while (lastp->next != NULL && wcscmp (name, lastp->name))
- lastp = lastp->next;
- }
-
- weights = lastp->ordering;
- }
- else
- {
- dummy_weights[0] = 1;
- dummy_weights[collate->nrules]
- = (unsigned int) ((unsigned long int) data);
-
- weights = dummy_weights;
- }
-
- /* In LASTP->ordering we now have the collation class.
- Determine the place in the hashing table next. */
- hash_val = hash_string (key, keylen);
- idx = hash_val % symbols_hash_tab_size;
-
- if (symbols_hash_tab[2 * idx] != (~((uint32_t) 0)))
- {
- /* We need the second hashing function. */
- size_t c = 1 + (hash_val % (symbols_hash_tab_size - 2));
-
- do
- if (idx >= symbols_hash_tab_size - c)
- idx -= symbols_hash_tab_size - c;
- else
- idx += c;
- while (symbols_hash_tab[2 * idx] != (~((uint32_t) 0)));
- }
-
- symbols_hash_tab[2 * idx] = obstack_object_size (&string_pool);
- symbols_hash_tab[2 * idx + 1] = (obstack_object_size (&non_simple)
- / sizeof (uint32_t));
-
- obstack_grow0 (&string_pool, key, keylen);
- /* Adding the first weight looks complicated. We have to deal
- with the kind it is stored and with the fact that original
- form uses `unsigned int's while we need `uint32_t' here. */
- word = weights[0];
- obstack_grow (&non_simple, &word, sizeof (uint32_t));
- for (cnt = 0; cnt < weights[0]; ++cnt)
- {
- word = weights[collate->nrules + cnt];
- obstack_grow (&non_simple, &word, sizeof (uint32_t));
- }
- }
-
- if (hash_tab == &charset->char_table)
- hash_tab = &collate->elements;
- else if (hash_tab == &collate->elements)
- hash_tab = &collate->symbols;
- else
- break;
- }
-
- /* Now we have the complete tables. */
- if (obstack_object_size (&string_pool) % 4 != 0)
- obstack_blank (&non_simple, 4 - (obstack_object_size (&string_pool) % 4));
- symbols_string_pool_size = obstack_object_size (&string_pool);
- symbols_string_pool = obstack_finish (&string_pool);
-
- symbols_class_size = obstack_object_size (&non_simple);
- symbols_class = obstack_finish (&non_simple);
-
- /* Generate tables with other byte order. */
- symbols_hash_tab_ob = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size
- * sizeof (uint32_t)));
- for (cnt = 0; cnt < 2 * symbols_hash_tab_size; ++cnt)
- symbols_hash_tab_ob[cnt] = bswap_32 (symbols_hash_tab[cnt]);
-
- symbols_class_ob = obstack_alloc (&non_simple, symbols_class_size);
- for (cnt = 0; cnt < symbols_class_size / 4; ++cnt)
- symbols_class_ob[cnt] = bswap_32 (symbols_class[cnt]);
-
-
- /* Store table addresses and lengths. */
-#if __BYTE_ORDER == __BIG_ENDIAN
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
- = table_best * level_best * entry_size * sizeof (table[0]);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table2;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
- = table_best * level_best * entry_size * sizeof (table[0]);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra2;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
-#else
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table2;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
- = table_best * level_best * entry_size * sizeof (table[0]);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
- = table_best * level_best * entry_size * sizeof (table[0]);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra2;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
-#endif
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (uint32_t);
-
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_base
- = &element_hash_tab_size;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_len
- = sizeof (uint32_t);
-
-#if __BYTE_ORDER == __BIG_ENDIAN
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base
- = element_hash_tab;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len
- = 2 * element_hash_tab_size * sizeof (uint32_t);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base
- = element_hash_tab_ob;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len
- = 2 * element_hash_tab_size * sizeof (uint32_t);
-#else
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base
- = element_hash_tab;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len
- = 2 * element_hash_tab_size * sizeof (uint32_t);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base
- = element_hash_tab_ob;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len
- = 2 * element_hash_tab_size * sizeof (uint32_t);
-#endif
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_base
- = element_string_pool;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_len
- = element_string_pool_size;
-
-#if __BYTE_ORDER == __BIG_ENDIAN
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_base
- = element_value;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_len
- = element_value_size;
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_base
- = element_value_ob;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_len
- = element_value_size;
-#else
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_base
- = element_value;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_len
- = element_value_size;
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_base
- = element_value_ob;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_len
- = element_value_size;
-#endif
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_base
- = &symbols_hash_tab_size;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_len
- = sizeof (uint32_t);
-
-#if __BYTE_ORDER == __BIG_ENDIAN
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base
- = symbols_hash_tab;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len
- = 2 * symbols_hash_tab_size * sizeof (uint32_t);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base
- = symbols_hash_tab_ob;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len
- = 2 * symbols_hash_tab_size * sizeof (uint32_t);
-#else
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base
- = symbols_hash_tab;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len
- = 2 * symbols_hash_tab_size * sizeof (uint32_t);
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base
- = symbols_hash_tab_ob;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len
- = 2 * symbols_hash_tab_size * sizeof (uint32_t);
-#endif
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_base
- = symbols_string_pool;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_len
- = symbols_string_pool_size;
-
-#if __BYTE_ORDER == __BIG_ENDIAN
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_base
- = symbols_class;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_len
- = symbols_class_size;
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_base
- = symbols_class_ob;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_len
- = symbols_class_size;
-#else
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_base
- = symbols_class;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_len
- = symbols_class_size;
-
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_base
- = symbols_class_ob;
- iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_len
- = symbols_class_size;
-#endif
-
- /* Update idx array. */
- idx[0] = iov[0].iov_len + iov[1].iov_len;
- for (cnt = 1; cnt < nelems; ++cnt)
- idx[cnt] = idx[cnt - 1] + iov[1 + cnt].iov_len;
-
- write_locale_data (output_path, "LC_COLLATE", 2 + nelems, iov);
-
- obstack_free (&non_simple, NULL);
- obstack_free (&string_pool, NULL);
-}
-
-
-static int
-collate_element_to (struct linereader *ldfile,
- struct locale_collate_t *collate,
- struct token *code, struct charmap_t *charmap,
- struct repertoire_t *repertoire)
-{
- struct charseq *seq;
- uint32_t value;
- void *not_used;
-
- seq = charmap_find_value (charmap, code->val.str.start, code->val.str.len);
- if (seq != NULL)
- {
- lr_error (ldfile, _("symbol for multicharacter collating element "
- "`%.*s' duplicates symbolic name in charmap"),
- (int) code->val.str.len, code->val.str.start);
- return 1;
- }
-
- value = repertoire_find_value (repertoire, code->val.str.start,
- code->val.str.len);
- if (value != ILLEGAL_CHAR_VALUE)
- {
- lr_error (ldfile, _("symbol for multicharacter collating element "
- "`%.*s' duplicates symbolic name in repertoire"),
- (int) code->val.str.len, code->val.str.start);
- return 1;
- }
-
- if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
- &not_used) >= 0)
- {
- lr_error (ldfile, _("symbol for multicharacter collating element "
- "`%.*s' duplicates other element definition"),
- (int) code->val.str.len, code->val.str.start);
- return 1;
- }
-
- if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
- &not_used) >= 0)
- {
- lr_error (ldfile, _("symbol for multicharacter collating element "
- "`%.*s' duplicates symbol definition"),
- (int) code->val.str.len, code->val.str.start);
- return 1;
- }
-
- return 0;
-}
-
-
-static void
-collate_element_from (struct linereader *ldfile,
- struct locale_collate_t *collate,
- const char *to_str, struct token *code,
- struct charmap_t *charmap,
- struct repertoire_t *repertoire)
-{
- element_t *elemp, *runp;
-
- /* CODE is a string. */
- elemp = (element_t *) obstack_alloc (&collate->element_mem,
- sizeof (element_t));
-
- /* We have to translate the string. It may contain <...> character
- names. */
- elemp->namemb = code->val.str.startmb;
- elemp->namewc = code->val.str.startwc;
- elemp->this_weight = 0;
- elemp->ordering = NULL;
- elemp->ordering_len = 0;
-
- if (elemp->namemb == NULL && elemp->namewc == NULL)
- {
- /* The string contains characters which are not in the charmap nor
- in the repertoire. Ignore the string. */
- if (verbose)
- lr_error (ldfile, _("\
-`from' string in collation element declaration contains unknown character"));
- return;
- }
-
- /* The entries in the linked lists of RESULT are sorting in
- descending order. The order is important for the `strcoll' and
- `wcscoll' functions. */
- if (find_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t),
- (void *) &runp) >= 0)
- {
- /* We already have an entry with this key. Check whether it is
- identical. */
- element_t *prevp = NULL;
- int cmpres;
-
- do
- {
- cmpres = wcscmp (elemp->namewc, runp->namewc);
- if (cmpres <= 0)
- break;
- prevp = runp;
- }
- while ((runp = runp->next) != NULL);
-
- if (cmpres == 0)
- lr_error (ldfile, _("\
-duplicate collating element definition (repertoire)"));
- else
- {
- elemp->next = runp;
- if (prevp == NULL)
- {
- if (set_entry (&collate->resultwc, elemp->namewc,
- sizeof (uint32_t), elemp) < 0)
- error (EXIT_FAILURE, 0, _("\
-error while inserting collation element into hash table"));
- }
- else
- prevp->next = elemp;
- }
- }
- else
- {
- elemp->next = NULL;
- if (insert_entry (&collate->resultwc, elemp->namewc, sizeof (uint32_t),
- elemp) < 0)
- error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
- }
-
- /* Now also insert the element definition in the multibyte table. */
- if (find_entry (&collate->resultmb, elemp->namemb, 1, (void *) &runp) >= 0)
- {
- /* We already have an entry with this key. Check whether it is
- identical. */
- element_t *prevp = NULL;
- int cmpres;
-
- do
- {
- cmpres = strcmp (elemp->namemb, runp->namemb);
- if (cmpres <= 0)
- break;
- prevp = runp;
- }
- while ((runp = runp->next) != NULL);
-
- if (cmpres == 0)
- lr_error (ldfile, _("\
-duplicate collating element definition (charmap)"));
- else
- {
- elemp->next = runp;
- if (prevp == NULL)
- {
- if (set_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0)
- error (EXIT_FAILURE, 0, _("\
-error while inserting collation element into hash table"));
- }
- else
- prevp->next = elemp;
- }
- }
- else
- {
- elemp->next = NULL;
- if (insert_entry (&collate->resultmb, elemp->namemb, 1, elemp) < 0)
- error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
- }
-
- /* Finally install the mapping from the `to'-name to the `from'-name. */
- if (insert_entry (&collate->elements, to_str, strlen (to_str),
- (void *) elemp) < 0)
- lr_error (ldfile, _("cannot insert new collating symbol definition: %s"),
- strerror (errno));
-}
-
-
-static void
-collate_symbol (struct linereader *ldfile, struct locale_collate_t *collate,
- struct token *code, struct charmap_t *charmap,
- struct repertoire_t *repertoire)
-{
- uint32_t value;
- struct charseq *seq;
- void *not_used;
-
- seq = charset_find_value (charmap, code->val.str.start, code->val.str.len);
- if (seq != NULL)
- {
- lr_error (ldfile, _("symbol for multicharacter collating element "
- "`%.*s' duplicates symbolic name in charmap"),
- (int) code->val.str.len, code->val.str.start);
- return;
- }
-
- value = repertoire (repertoire, code->val.str.start, code->val.str.len);
- if (value != ILLEGAL_CHAR_VALUE)
- {
- lr_error (ldfile, _("symbol for multicharacter collating element "
- "`%.*s' duplicates symbolic name in repertoire"),
- (int) code->val.str.len, code->val.str.start);
- return;
- }
-
- if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
- &not_used) >= 0)
- {
- lr_error (ldfile, _("symbol for multicharacter collating element "
- "`%.*s' duplicates element definition"),
- (int) code->val.str.len, code->val.str.start);
- return;
- }
-
- if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len,
- &not_used) >= 0)
- {
- lr_error (ldfile, _("symbol for multicharacter collating element "
- "`%.*s' duplicates other symbol definition"),
- (int) code->val.str.len, code->val.str.start);
- return;
- }
-
- if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len,
- (void *) 0) < 0)
- lr_error (ldfile, _("cannot insert new collating symbol definition: %s"),
- strerror (errno));
-}
-
-
-void
-collate_new_order (struct linereader *ldfile, struct localedef_t *locale,
- enum coll_sort_rule sort_rule)
-{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
-
- if (collate->nrules >= collate->nrules_max)
- {
- collate->nrules_max *= 2;
- collate->rules
- = (enum coll_sort_rule *) xrealloc (collate->rules,
- collate->nrules_max
- * sizeof (enum coll_sort_rule));
- }
-
- collate->rules[collate->nrules++] = sort_rule;
-}
-
-
-void
-collate_build_arrays (struct linereader *ldfile, struct localedef_t *locale)
-{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
-
- collate->rules
- = (enum coll_sort_rule *) xrealloc (collate->rules,
- collate->nrules
- * sizeof (enum coll_sort_rule));
-
- /* Allocate arrays for temporary weights. */
- collate->weight_cnt = (int *) xmalloc (collate->nrules * sizeof (int));
-
- /* Choose arbitrary start value for table size. */
- collate->nweight_max = 5 * collate->nrules;
- collate->weight = (int *) xmalloc (collate->nweight_max * sizeof (int));
-}
-
-
-int
-collate_order_elem (struct linereader *ldfile, struct localedef_t *locale,
- struct token *code, struct charset_t *charset)
-{
- const uint32_t zero = L'\0';
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
- int result = 0;
- uint32_t value;
- void *tmp;
- unsigned int i;
-
- switch (code->tok)
- {
- case tok_bsymbol:
- /* We have a string to find in one of the three hashing tables. */
- value = charset_find_value (&charset->char_table, code->val.str.start,
- code->val.str.len);
- if (value != ILLEGAL_CHAR_VALUE)
- {
- element_t *lastp, *firstp;
-
- collate->kind = character;
-
- if (find_entry (&collate->result, &value, sizeof (uint32_t),
- (void *) &firstp) < 0)
- firstp = lastp = NULL;
- else
- {
- /* The entry for the simple character is always found at
- the end. */
- lastp = firstp;
- while (lastp->next != NULL)
- lastp = lastp->next;
-
- if (lastp->name[0] == value && lastp->name[1] == L'\0')
- {
- lr_error (ldfile,
- _("duplicate definition for character `%.*s'"),
- (int) code->val.str.len, code->val.str.start);
- lr_ignore_rest (ldfile, 0);
- result = -1;
- break;
- }
- }
-
- collate->current_element
- = (element_t *) obstack_alloc (&collate->element_mem,
- sizeof (element_t));
-
- obstack_grow (&collate->element_mem, &value, sizeof (value));
- obstack_grow (&collate->element_mem, &zero, sizeof (zero));
-
- collate->current_element->name =
- (const uint32_t *) obstack_finish (&collate->element_mem);
-
- collate->current_element->this_weight = ++collate->order_cnt;
-
- collate->current_element->next = NULL;
-
- if (firstp == NULL)
- {
- if (insert_entry (&collate->result, &value, sizeof (uint32_t),
- (void *) collate->current_element) < 0)
- {
- lr_error (ldfile, _("cannot insert collation element `%.*s'"),
- (int) code->val.str.len, code->val.str.start);
- exit (4);
- }
- }
- else
- lastp->next = collate->current_element;
- }
- else if (find_entry (&collate->elements, code->val.str.start,
- code->val.str.len, &tmp) >= 0)
- {
- collate->current_element = (element_t *) tmp;
-
- if (collate->current_element->this_weight != 0)
- {
- lr_error (ldfile, _("\
-collation element `%.*s' appears more than once: ignore line"),
- (int) code->val.str.len, code->val.str.start);
- lr_ignore_rest (ldfile, 0);
- result = -1;
- break;
- }
-
- collate->kind = element;
- collate->current_element->this_weight = ++collate->order_cnt;
- }
- else if (find_entry (&collate->symbols, code->val.str.start,
- code->val.str.len, &tmp) >= 0)
- {
- unsigned int order = ++collate->order_cnt;
-
- if ((unsigned long int) tmp != 0ul)
- {
- lr_error (ldfile, _("\
-collation symbol `%.*s' appears more than once: ignore line"),
- (int) code->val.str.len, code->val.str.start);
- lr_ignore_rest (ldfile, 0);
- result = -1;
- break;
- }
-
- collate->kind = symbol;
-
- if (set_entry (&collate->symbols, code->val.str.start,
- code->val.str.len, (void *) order) < 0)
- {
- lr_error (ldfile, _("cannot process order specification"));
- exit (4);
- }
- }
- else
- {
- if (verbose)
- lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
- (int) code->val.str.len, code->val.str.start);
- lr_ignore_rest (ldfile, 0);
-
- result = -1;
- }
- break;
-
- case tok_undefined:
- collate->kind = undefined;
- collate->current_element = &collate->undefined;
- break;
-
- case tok_ellipsis:
- if (collate->was_ellipsis)
- {
- lr_error (ldfile, _("\
-two lines in a row containing `...' are not allowed"));
- result = -1;
- }
- else if (collate->kind != character)
- {
- /* An ellipsis requires the previous line to be an
- character definition. */
- lr_error (ldfile, _("\
-line before ellipsis does not contain definition for character constant"));
- lr_ignore_rest (ldfile, 0);
- result = -1;
- }
- else
- collate->kind = ellipsis;
- break;
-
- default:
- assert (! "illegal token in `collate_order_elem'");
- }
-
- /* Now it's time to handle the ellipsis in the previous line. We do
- this only when the last line contained an definition for a
- character, the current line also defines an character, the
- character code for the later is bigger than the former. */
- if (collate->was_ellipsis)
- {
- if (collate->kind != character)
- {
- lr_error (ldfile, _("\
-line after ellipsis must contain character definition"));
- lr_ignore_rest (ldfile, 0);
- result = -1;
- }
- else if (collate->last_char > value)
- {
- lr_error (ldfile, _("end point of ellipsis range is bigger then start"));
- lr_ignore_rest (ldfile, 0);
- result = -1;
- }
- else
- {
- /* We can fill the arrays with the information we need. */
- uint32_t name[2];
- unsigned int *data;
- size_t *ptr;
- size_t cnt;
-
- name[0] = collate->last_char + 1;
- name[1] = L'\0';
-
- data = (unsigned int *) alloca ((collate->nrules + collate->nweight)
- * sizeof (unsigned int));
- ptr = (size_t *) alloca (collate->nrules * sizeof (size_t));
-
- /* Prepare data. Because the characters covered by an
- ellipsis all have equal values we prepare the data once
- and only change the variable number (if there are any).
- PTR[...] will point to the entries which will have to be
- fixed during the output loop. */
- for (cnt = 0; cnt < collate->nrules; ++cnt)
- {
- data[cnt] = collate->weight_cnt[cnt];
- ptr[cnt] = (cnt == 0
- ? collate->nweight
- : ptr[cnt - 1] + collate->weight_cnt[cnt - 1]);
- }
-
- for (cnt = 0; cnt < collate->nweight; ++cnt)
- data[collate->nrules + cnt] = collate->weight[cnt];
-
- for (cnt = 0; cnt < collate->nrules; ++cnt)
- if ((uint32_t) data[ptr[cnt]] != ELLIPSIS_CHAR)
- ptr[cnt] = 0;
-
- while (name[0] <= value)
- {
- element_t *pelem;
-
- pelem = (element_t *) obstack_alloc (&collate->element_mem,
- sizeof (element_t));
- pelem->name
- = (const uint32_t *) obstack_copy (&collate->element_mem,
- name, 2 * sizeof (uint32_t));
- pelem->this_weight = ++collate->order_cnt;
-
- pelem->ordering_len = collate->nweight;
- pelem->ordering
- = (unsigned int *) obstack_copy (&collate->element_mem, data,
- (collate->nrules
- + pelem->ordering_len)
- * sizeof (unsigned int));
-
- /* `...' weights need to be adjusted. */
- for (cnt = 0; cnt < collate->nrules; ++cnt)
- if (ptr[cnt] != 0)
- pelem->ordering[ptr[cnt]] = pelem->this_weight;
-
- /* Insert new entry into result table. */
- if (find_entry (&collate->result, name, sizeof (uint32_t),
- (void *) &pelem->next) >= 0)
- {
- if (set_entry (&collate->result, name, sizeof (uint32_t),
- (void *) pelem) < 0)
- error (4, 0, _("cannot insert into result table"));
- }
- else
- {
- pelem->next = NULL;
- if (insert_entry (&collate->result, name, sizeof (uint32_t),
- (void *) pelem) < 0)
- error (4, 0, _("cannot insert into result table"));
- }
-
- /* Increment counter. */
- ++name[0];
- }
- }
- }
-
- /* Reset counters for weights. */
- collate->weight_idx = 0;
- collate->nweight = 0;
- for (i = 0; i < collate->nrules; ++i)
- collate->weight_cnt[i] = 0;
- collate->current_patch = NULL;
-
- return result;
-}
-
-
-int
-collate_weight_bsymbol (struct linereader *ldfile, struct localedef_t *locale,
- struct token *code, struct charset_t *charset)
-{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
- unsigned int here_weight;
- uint32_t value;
- void *tmp;
-
- assert (code->tok == tok_bsymbol);
-
- value = charset_find_value (&charset->char_table, code->val.str.start,
- code->val.str.len);
- if (value != ILLEGAL_CHAR_VALUE)
- {
- element_t *runp;
-
- if (find_entry (&collate->result, &value, sizeof (uint32_t),
- (void *)&runp) < 0)
- runp = NULL;
-
- while (runp != NULL
- && (runp->name[0] != value || runp->name[1] != L'\0'))
- runp = runp->next;
-
- here_weight = runp == NULL ? 0 : runp->this_weight;
- }
- else if (find_entry (&collate->elements, code->val.str.start,
- code->val.str.len, &tmp) >= 0)
- {
- element_t *runp = (element_t *) tmp;
-
- here_weight = runp->this_weight;
- }
- else if (find_entry (&collate->symbols, code->val.str.start,
- code->val.str.len, &tmp) >= 0)
- {
- here_weight = (unsigned int) tmp;
- }
- else
- {
- if (verbose)
- lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
- (int) code->val.str.len, code->val.str.start);
- lr_ignore_rest (ldfile, 0);
- return -1;
- }
-
- /* When we currently work on a collation symbol we do not expect any
- weight. */
- if (collate->kind == symbol)
- {
- lr_error (ldfile, _("\
-specification of sorting weight for collation symbol does not make sense"));
- lr_ignore_rest (ldfile, 0);
- return -1;
- }
-
- /* Add to the current collection of weights. */
- if (collate->nweight >= collate->nweight_max)
- {
- collate->nweight_max *= 2;
- collate->weight = (unsigned int *) xrealloc (collate->weight,
- collate->nweight_max);
- }
-
- /* If the weight is currently not known, we remember to patch the
- resulting tables. */
- if (here_weight == 0)
- {
- patch_t *newp;
-
- newp = (patch_t *) obstack_alloc (&collate->element_mem,
- sizeof (patch_t));
- newp->fname = ldfile->fname;
- newp->lineno = ldfile->lineno;
- newp->token = (const char *) obstack_copy0 (&collate->element_mem,
- code->val.str.start,
- code->val.str.len);
- newp->where.idx = collate->nweight++;
- newp->next = collate->current_patch;
- collate->current_patch = newp;
- }
- else
- collate->weight[collate->nweight++] = here_weight;
- ++collate->weight_cnt[collate->weight_idx];
-
- return 0;
-}
-
-
-int
-collate_next_weight (struct linereader *ldfile, struct localedef_t *locale)
-{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
-
- if (collate->kind == symbol)
- {
- lr_error (ldfile, _("\
-specification of sorting weight for collation symbol does not make sense"));
- lr_ignore_rest (ldfile, 0);
- return -1;
- }
-
- ++collate->weight_idx;
- if (collate->weight_idx >= collate->nrules)
- {
- lr_error (ldfile, _("too many weights"));
- lr_ignore_rest (ldfile, 0);
- return -1;
- }
-
- return 0;
-}
-
-
-int
-collate_simple_weight (struct linereader *ldfile, struct localedef_t *locale,
- struct token *code, struct charset_t *charset)
-{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
- unsigned int value = 0;
-
- /* There current tokens can be `IGNORE', `...', or a string. */
- switch (code->tok)
- {
- case tok_ignore:
- /* This token is allowed in all situations. */
- value = IGNORE_CHAR;
- break;
-
- case tok_ellipsis:
- /* The ellipsis is only allowed for the `...' or `UNDEFINED'
- entry. */
- if (collate->kind != ellipsis && collate->kind != undefined)
- {
- lr_error (ldfile, _("\
-`...' must only be used in `...' and `UNDEFINED' entries"));
- lr_ignore_rest (ldfile, 0);
- return -1;
- }
- value = ELLIPSIS_CHAR;
- break;
-
- case tok_string:
- /* This can become difficult. We have to get the weights which
- correspond to the single wide chars in the string. But some
- of the `chars' might not be real characters, but collation
- elements or symbols. And so the string decoder might have
- signaled errors. The string at this point is not translated.
- I.e., all <...> sequences are still there. */
- {
- char *runp = code->val.str.start;
- void *tmp;
-
- while (*runp != '\0')
- {
- char *startp = (char *) runp;
- char *putp = (char *) runp;
- uint32_t wch;
-
- /* Lookup weight for char and store it. */
- if (*runp == '<')
- {
- while (*++runp != '\0' && *runp != '>')
- {
- if (*runp == ldfile->escape_char)
- if (*++runp == '\0')
- {
- lr_error (ldfile, _("unterminated weight name"));
- lr_ignore_rest (ldfile, 0);
- return -1;
- }
- *putp++ = *runp;
- }
- if (*runp == '>')
- ++runp;
-
- if (putp == startp)
- {
- lr_error (ldfile, _("empty weight name: line ignored"));
- lr_ignore_rest (ldfile, 0);
- return -1;
- }
-
- wch = charset_find_value (&charset->char_table, startp,
- putp - startp);
- if (wch != ILLEGAL_CHAR_VALUE)
- {
- element_t *pelem;
-
- if (find_entry (&collate->result, &wch, sizeof (uint32_t),
- (void *)&pelem) < 0)
- pelem = NULL;
-
- while (pelem != NULL
- && (pelem->name[0] != wch
- || pelem->name[1] != L'\0'))
- pelem = pelem->next;
-
- value = pelem == NULL ? 0 : pelem->this_weight;
- }
- else if (find_entry (&collate->elements, startp, putp - startp,
- &tmp) >= 0)
- {
- element_t *pelem = (element_t *) tmp;
-
- value = pelem->this_weight;
- }
- else if (find_entry (&collate->symbols, startp, putp - startp,
- &tmp) >= 0)
- {
- value = (unsigned int) tmp;
- }
- else
- {
- if (verbose)
- lr_error (ldfile, _("unknown symbol `%.*s': line ignored"),
- (int) (putp - startp), startp);
- lr_ignore_rest (ldfile, 0);
- return -1;
- }
- }
- else
- {
- element_t *wp;
- uint32_t wch;
-
- if (*runp == ldfile->escape_char)
- {
- static const char digits[] = "0123456789abcdef";
- const char *dp;
- int base;
-
- ++runp;
- if (tolower (*runp) == 'x')
- {
- ++runp;
- base = 16;
- }
- else if (tolower (*runp) == 'd')
- {
- ++runp;
- base = 10;
- }
- else
- base = 8;
-
- dp = strchr (digits, tolower (*runp));
- if (dp == NULL || (dp - digits) >= base)
- {
- illegal_char:
- lr_error (ldfile, _("\
-illegal character constant in string"));
- lr_ignore_rest (ldfile, 0);
- return -1;
- }
- wch = dp - digits;
- ++runp;
-
- dp = strchr (digits, tolower (*runp));
- if (dp == NULL || (dp - digits) >= base)
- goto illegal_char;
- wch *= base;
- wch += dp - digits;
- ++runp;
-
- if (base != 16)
- {
- dp = strchr (digits, tolower (*runp));
- if (dp != NULL && (dp - digits < base))
- {
- wch *= base;
- wch += dp - digits;
- ++runp;
- }
- }
- }
- else
- wch = (uint32_t) *runp++;
-
- /* Lookup the weight for WCH. */
- if (find_entry (&collate->result, &wch, sizeof (wch),
- (void *)&wp) < 0)
- wp = NULL;
-
- while (wp != NULL
- && (wp->name[0] != wch || wp->name[1] != L'\0'))
- wp = wp->next;
-
- value = wp == NULL ? 0 : wp->this_weight;
-
- /* To get the correct name for the error message. */
- putp = runp;
-
- /**************************************************\
- |* I know here is something wrong. Characters in *|
- |* the string which are not in the <...> form *|
- |* cannot be declared forward for now!!! *|
- \**************************************************/
- }
-
- /* Store in weight array. */
- if (collate->nweight >= collate->nweight_max)
- {
- collate->nweight_max *= 2;
- collate->weight
- = (unsigned int *) xrealloc (collate->weight,
- collate->nweight_max);
- }
-
- if (value == 0)
- {
- patch_t *newp;
-
- newp = (patch_t *) obstack_alloc (&collate->element_mem,
- sizeof (patch_t));
- newp->fname = ldfile->fname;
- newp->lineno = ldfile->lineno;
- newp->token
- = (const char *) obstack_copy0 (&collate->element_mem,
- startp, putp - startp);
- newp->where.idx = collate->nweight++;
- newp->next = collate->current_patch;
- collate->current_patch = newp;
- }
- else
- collate->weight[collate->nweight++] = value;
- ++collate->weight_cnt[collate->weight_idx];
- }
- }
- return 0;
-
- default:
- assert (! "should not happen");
- }
-
-
- if (collate->nweight >= collate->nweight_max)
- {
- collate->nweight_max *= 2;
- collate->weight = (unsigned int *) xrealloc (collate->weight,
- collate->nweight_max);
- }
-
- collate->weight[collate->nweight++] = value;
- ++collate->weight_cnt[collate->weight_idx];
-
- return 0;
-}
-
-
-void
-collate_end_weight (struct linereader *ldfile, struct localedef_t *locale)
-{
- struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
- element_t *pelem = collate->current_element;
-
- if (collate->kind == symbol)
- {
- /* We don't have to do anything. */
- collate->was_ellipsis = 0;
- return;
- }
-
- if (collate->kind == ellipsis)
- {
- /* Before the next line is processed the ellipsis is handled. */
- collate->was_ellipsis = 1;
- return;
- }
-
- assert (collate->kind == character || collate->kind == element
- || collate->kind == undefined);
-
- /* Fill in the missing weights. */
- while (++collate->weight_idx < collate->nrules)
- {
- collate->weight[collate->nweight++] = pelem->this_weight;
- ++collate->weight_cnt[collate->weight_idx];
- }
-
- /* Now we know how many ordering weights the current
- character/element has. Allocate room in the element structure
- and copy information. */
- pelem->ordering_len = collate->nweight;
-
- /* First we write an array with the number of values for each
- weight. */
- obstack_grow (&collate->element_mem, collate->weight_cnt,
- collate->nrules * sizeof (unsigned int));
-
- /* Now the weights itselves. */
- obstack_grow (&collate->element_mem, collate->weight,
- collate->nweight * sizeof (unsigned int));
-
- /* Get result. */
- pelem->ordering = obstack_finish (&collate->element_mem);
-
- /* Now we handle the "patches". */
- while (collate->current_patch != NULL)
- {
- patch_t *this_patch;
-
- this_patch = collate->current_patch;
-
- this_patch->where.pos = &pelem->ordering[collate->nrules
- + this_patch->where.idx];
-
- collate->current_patch = this_patch->next;
- this_patch->next = collate->all_patches;
- collate->all_patches = this_patch;
- }
-
- /* Set information for next round. */
- collate->was_ellipsis = 0;
- if (collate->kind != undefined)
- collate->last_char = pelem->name[0];
-}
-
-
-/* The parser for the LC_CTYPE section of the locale definition. */
-void
-read_lc_collate (struct linereader *ldfile, struct localedef_t *result,
- struct charmap_t *charmap, struct repertoire_t *repertoire,
- int ignore_content)
-{
- struct locale_collate_t *collate;
- int did_copy = 0;
- const char *save_str;
-
- /* The rest of the line containing `LC_COLLATE' must be free. */
- lr_ignore_rest (ldfile, 1);
-
- now = lr_token (ldfile, charmap, NULL);
- nowtok = now->tok;
-
- /* If we see `copy' now we are almost done. */
- if (nowtok == tok_copy)
- {
- handle_copy (ldfile, charmap, repertoire, result, tok_lc_collate,
- LC_COLLATE, "LC_COLLATE", ignore_content);
- did_copy = 1;
- }
-
- /* Prepare the data structures. */
- collate_startup (ldfile, result, charmap, ignore_content);
- collate = result->categories[LC_COLLATE].collate;
-
- while (1)
- {
- /* Of course we don't proceed beyond the end of file. */
- if (nowtok == tok_eof)
- break;
-
- /* Ignore empty lines. */
- if (nowtok == tok_eol)
- {
- now = lr_token (ldfile, charmap, NULL);
- nowtok = now->tok;
- continue;
- }
-
- switch (nowtok)
- {
- case tok_coll_weight_max:
- if (did_copy)
- goto err_label;
- /* The rest of the line must be a single integer value. */
- now = lr_token (ldfile, charmap, NULL);
- if (now->tok != tok_number)
- goto err_label;
- /* We simply forget about the value we just read, the implementation
- has no fixed limits. */
- lr_ignore_rest (ldfile, 1);
- break;
-
- case tok_script:
- if (did_copy)
- goto err_label;
- /* We expect the name of the script in brackets. */
- now = lr_token (ldfile, charmap, NULL);
- if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
- goto err_label;
- if (now->tok != tok_bsymbol)
- {
- lr_error (ldfile, _("\
-script name `%s' must not duplicate any known name"),
- tok->val.str.startmb);
- lr_ignore_rest (ldfile, 0);
- break;
- }
- collate->scripts = xmalloc (collate->scripts,
- (collate->nscripts
- * sizeof (const char *)));
- collate->scripts[collate->nscripts++] = tok->val.str.startmb;
- lr_ignore_rest (ldfile, 1);
- break;
-
- case tok_collating_element:
- if (did_copy)
- goto err_label;
- /* Get the first argument, a symbol in brackets. */
- now = lr_token (ldfile, charmap, NULL);
- if (now->tok != tok_bsymbol)
- goto err_label;
- /* Test it. */
- if (collate_element_to (ldfile, collate, now, charmap, repertoire))
- {
- /* An error occurred. */
- lr_ignore_rest (ldfile, 0);
- break;
- }
- save_str = tok->val.str.startmb;
- /* Next comes `from'. */
- now = lr_token (ldfile, charmap, NULL);
- if (now->tok != tok_from)
- goto err_label;
- /* Now comes a string. */
- now = lr_token (ldfile, charmap, repertoire);
- if (now->tok != tok_string)
- goto err_label;
- collate_element_from (ldfile, collate, save_str, now, charmap,
- repertoire);
- /* The rest of the line should be empty. */
- lr_ignore_rest (ldfile, 1);
- break;
-
- case tok_collating_symbol:
- if (did_copy)
- goto err_label;
- /* Get the argument, a single symbol in brackets. */
- now = lr_token (ldfile, charmap, NULL);
- if (now->tok != tok_bsymbol)
- goto err_label;
- collate_symbol (ldfile, collate, now, charmap, repertoire);
- break;
-
- case tok_order_start:
- if (did_copy)
- goto err_label;
-
- /* We expect now a scripting symbol or start right away
- with the order keywords. Or we have no argument at all
- in which means `forward'. */
- now = lr_token (ldfile, charmap, NULL);
- if (now->tok == tok_eol)
- {
- static enum coll_sort_rule default_rule = sort_forward;
- /* Use a single `forward' rule. */
- collate->nrules = 1;
- collate->rules = &default_rule;
- }
- else
- {
- /* XXX We don't recognize the ISO 14651 extensions yet. */
- uint32_t nrules = 0;
- uint32_t nrules_max = 32;
- enum coll_sort_rule *rules = alloca (nrules_max
- * sizeof (*rules));
- int saw_semicolon = 0;
-
- memset (rules, '\0', nrules_max * sizeof (*rules));
- do
- {
- if (now->tok != tok_forward && now->tok != tok_backward
- && now->tok != tok_position)
- goto err_label;
-
- if (saw_semicolon)
- {
- if (nrules == nrules_max)
- {
- newp = alloca (nrules_max * 2 * sizeof (*rules));
- rules = memcpy (newp, rules,
- nrules_max * sizeof (*rules));
- memset (&rules[nrules_max], '\0',
- nrules_max * sizeof (*rules));
- nrules_max *= 2;
- }
- ++nrules;
- }
-
- switch (now->tok)
- {
- case tok_forward:
- if ((rules[nrules] & sort_backward) != 0)
- {
- lr_error (ldfile, _("\
-`forward' and `backward' order exclude each other"));
- lr_ignore_rest (ldfile, 0);
- goto error_sort;
- }
- rules[nrules] |= sort_forward;
- break;
- case tok_backward:
- if ((rules[nrules] & sort_forward) != 0)
- {
- lr_error (ldfile, _("\
-`forward' and `backward' order exclude each other"));
- lr_ignore_rest (ldfile, 0);
- goto error_sort;
- }
- rules[nrules] |= sort_backward;
- break;
- case tok_position:
- rules[nrules] |= tok_position;
- break;
- }
-
- /* Get the next token. This is either the end of the line,
- a comma or a semicolon. */
- now = lr_token (ldfile, charmap, NULL);
- if (now->tok == tok_comma || now->tok == tok_semicolon)
- {
- saw_semicolon = now->tok == tok_semicolon;
- now = lr_token (ldfile, charmap, NULL);
- }
- }
- while (now->tok != tok_eol || now->tok != tok_eof);
-
- error_sort:
- collate->nrules = nrules;
- collate->rules = memcpy (xmalloc (nrules * sizeof (*rules)),
- rules, nrules * sizeof (*rules));
- }
-
- /* Now read the rules. */
- read_rules (ldfile, collate, charmap, repertoire);
- break;
-
- case tok_reorder_after:
- break;
-
- case tok_reorder_script_after:
- break;
-
- default:
- err_label:
- if (now->tok != tok_eof)
- SYNTAX_ERROR (_("syntax error in %s locale definition"),
- "LC_COLLATE");
- }
-
- /* Prepare for the next round. */
- now = lr_token (ldfile, charmap, NULL);
- nowtok = now->tok;
- }
-
- /* When we come here we reached the end of the file. */
- lr_error (ldfile, _("premature end of file while reading category `%s'"),
- "LC_COLLATE");
-}
-
-#endif