aboutsummaryrefslogtreecommitdiff
path: root/locale/programs/ld-collate.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-12-18 19:45:25 +0000
committerUlrich Drepper <drepper@redhat.com>1999-12-18 19:45:25 +0000
commitb85697f61d8aeeaeb8b91d50ab2c668b7fcbbd8a (patch)
tree6aec765390be55804ecbe7c1bde6c79dda2e1dc2 /locale/programs/ld-collate.c
parent440a52ea7b427b7a5668c77283825cae20d7fc3c (diff)
downloadglibc-b85697f61d8aeeaeb8b91d50ab2c668b7fcbbd8a.tar
glibc-b85697f61d8aeeaeb8b91d50ab2c668b7fcbbd8a.tar.gz
glibc-b85697f61d8aeeaeb8b91d50ab2c668b7fcbbd8a.tar.bz2
glibc-b85697f61d8aeeaeb8b91d50ab2c668b7fcbbd8a.zip
Update.
1999-12-17 Ulrich Drepper <drepper@cygnus.com> * string/bits/string2.h (__strtok_r_1c): Help gcc optimizing string access. * locale/programs/ld-collate.c: Implement handling of absolute ellipsis. Parsing of file and constructing the internal data structures should now be complete. (collate_finish): Start adding support to generate the data structures which are written out to the file. * intl/dcgettext.c: Rewrite to handle caching of previous results here instead of in the dcgettext macro. * intl/libintl.h (dcgettext): Don't define for systems using this glibc or systems with tsearch. * sysdeps/generic/mathdef.h: Protect definitions for math.h against double inclusion. * sysdeps/alpha/fpu/bits/mathdef.h: Likewise. * sysdeps/i386/fpu/bits/mathdef.h: Likewise. * sysdeps/m68k/fpu/bits/mathdef.h: Likewise. * sysdeps/powerpc/fpu/bits/mathdef.h: Likewise. * sysdeps/i386/fpu/libm-test-ulps: Add more deltas (are mobile PIIs that different?). 1999-12-17 Andreas Jaeger <aj@suse.de> * rt/aio.h (struct aiocb64): Add member __next_prio to sync the struct with aiocb. * rt/Makefile (tests): Added tst-aio64. Added dependency rules for tst-aio64. * rt/tst-aio64.c: New file, copied from tst-aio.c and changed for 64bit tests. 1999-12-15 Thorsten Kukuk <kukuk@suse.de> * sysdeps/unix/sysv/linux/alpha/oldgetrlimit64.c: Removed. * sysdeps/unix/sysv/linux/alpha/oldsetrlimit64.c: Removed. * sysdeps/unix/sysv/linux/bits/resource.h: Change RLIM_INFINITY back to old value (signed long). * sysdeps/unix/sysv/linux/i386/bits/resource.h: New, with unsigned long RLIM_INFINITY. * sysdeps/unix/sysv/linux/getrlimit.c: Moved from here to ... * sysdeps/unix/sysv/linux/i386/getrlimit.c: ... here. * sysdeps/unix/sysv/linux/getrlimit64.c: Moved from here to ... * sysdeps/unix/sysv/linux/i386/getrlimit64.c: ... here. * sysdeps/unix/sysv/linux/oldgetrlimit64.c: Moved from here to ... * sysdeps/unix/sysv/linux/i386/oldgetrlimit64.c: ... here. * sysdeps/unix/sysv/linux/oldsetrlimit64.c: Moved from here to ... * sysdeps/unix/sysv/linux/i386/oldsetrlimit64.c: ... here. * sysdeps/unix/sysv/linux/setrlimit.c: Moved from here to ... * sysdeps/unix/sysv/linux/i386/setrlimit.c: ... here. * sysdeps/unix/sysv/linux/setrlimit64.c: Moved from here to ... * sysdeps/unix/sysv/linux/i386/setrlimit64.c: ... here. * sysdeps/unix/sysv/linux/sparc/bits/resource.h: New. * sysdeps/unix/sysv/linux/sparc/sparc64/oldgetrlimit64.c: Removed. * sysdeps/unix/sysv/linux/sparc/sparc64/oldsetrlimit64.c: Removed. 1999-12-17 Andreas Jaeger <aj@suse.de> * elf/ldconfig.c: Add new option -l to manualy link shared libraries. (options): Added option. (parse_opt): Set option. (main): Handle option. (manual_link): New function. 1999-12-17 Thorsten Kukuk <kukuk@suse.de> * string/bits/string2.h: Fix patch from 1999-12-07. 1999-12-16 Ulrich Drepper <drepper@cygnus.com> * sysdeps/generic/strsep.c: If delim string has only one character don't run over end of string. * locale/programs/ld-collate.c (insert_weights): Also update next pointer of last cursor element. (insert_value): Return nonzero value if nothing got inserted. (handle_ellipsis): Don't do anything if to-value cannot be inserted. 1999-12-10 Jakub Jelinek <jakub@redhat.com> * stdlib/longlong.h (__sparc_v9__): Use %rDIGIT instead of %DIGIT where appropriate. 1999-12-10 Jakub Jelinek <jakub@redhat.com> * sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c (__sigaction): Copy sa_flags into kernel sigaction structure. 1999-12-14 Andreas Jaeger <aj@suse.de> * string/tester.c (test_strsep): More tests for access beyond the final NUL. The first two tests come from PR libc/1486 by martinea@iro.umontreal.ca. 1999-12-14 Thorsten Kukuk <kukuk@suse.de> * nis/ypclnt.c: Correct handling of cached client handles. (__xdr_ypresp_all): Call callback function for errors, too, like Solaris does. * nis/nss_compat/compat-grp.c: Make sure errno is always set correct. * nis/nss_compat/compat-initgroups.c: Likewise. * nis/nss_compat/compat-spwd.c: Likewise. * nis/nss_nis/nis-alias.c: Likewise. * nis/nss_nis/nis-ethers.c: Likewise. * nis/nss_nis/nis-grp.c: Likewise. * nis/nss_nis/nis-hosts.c: Likewise. * nis/nss_nis/nis-netgrp.c: Likewise. * nis/nss_nis/nis-publickey.c: Likewise. * nis/nss_nis/nis-service.c: Likewise. Also use services.byservicename Map if available, optimize query if name/port and protocol is known.
Diffstat (limited to 'locale/programs/ld-collate.c')
-rw-r--r--locale/programs/ld-collate.c317
1 files changed, 288 insertions, 29 deletions
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index 42fd601064..87005e86ab 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -73,7 +73,8 @@ struct element_t
const char *mbs;
const uint32_t *wcs;
- int order;
+ int mborder;
+ int wcorder;
struct element_list_t *weights;
@@ -87,6 +88,9 @@ struct element_t
/* Predecessor and successor in the order list. */
struct element_t *last;
struct element_t *next;
+
+ /* Next element in multibyte output list. */
+ struct element_t *mbnext;
};
/* Special element value. */
@@ -151,6 +155,10 @@ struct locale_collate_t
that the definitions from more than one input file contains information.
Therefore we keep all relevant input in a list. */
struct locale_collate_t *next;
+
+ /* Arrays with heads of the list for each of the leading bytes in
+ the multibyte sequences. */
+ struct element_t *mbheads[256];
};
@@ -176,7 +184,7 @@ make_seclist_elem (struct locale_collate_t *collate, const char *string,
static struct element_t *
-new_element (struct locale_collate_t *collate, const char *mbs,
+new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
const uint32_t *wcs, const char *name, size_t namelen)
{
struct element_t *newp;
@@ -185,7 +193,10 @@ new_element (struct locale_collate_t *collate, const char *mbs,
sizeof (*newp));
newp->name = name == NULL ? NULL : obstack_copy (&collate->mempool,
name, namelen);
- newp->mbs = mbs;
+ if (mbs != NULL)
+ newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
+ else
+ newp->mbs = NULL;
if (wcs != NULL)
{
size_t nwcs = wcslen ((wchar_t *) wcs) + 1;
@@ -196,7 +207,8 @@ new_element (struct locale_collate_t *collate, const char *mbs,
}
else
newp->wcs = NULL;
- newp->order = 0;
+ newp->mborder = 0;
+ newp->wcorder = 0;
/* Will be allocated later. */
newp->weights = NULL;
@@ -209,6 +221,8 @@ new_element (struct locale_collate_t *collate, const char *mbs,
newp->last = NULL;
newp->next = NULL;
+ newp->mbnext = NULL;
+
return newp;
}
@@ -457,14 +471,15 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate,
result = sym->order;
if (result == NULL)
- result = sym->order = new_element (collate, NULL, NULL, NULL, 0);
+ result = sym->order = new_element (collate, NULL, 0, NULL,
+ NULL, 0);
}
else if (find_entry (&collate->elem_table, str, len,
(void **) &result) != 0)
{
/* It's also no collation element. So it is an character
element defined later. */
- result = new_element (collate, NULL, NULL, str, len);
+ result = new_element (collate, NULL, 0, NULL, str, len);
if (result != NULL)
/* Insert it into the sequence table. */
insert_entry (&collate->seq_table, str, len, result);
@@ -499,6 +514,8 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
elem->line = ldfile->lineno;
elem->last = collate->cursor;
elem->next = collate->cursor ? collate->cursor->next : NULL;
+ if (collate->cursor != NULL)
+ collate->cursor->next = elem;
elem->weights = (struct element_list_t *)
obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
@@ -683,7 +700,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
}
-static void
+static int
insert_value (struct linereader *ldfile, struct token *arg,
struct charmap_t *charmap, struct repertoire_t *repertoire,
struct locale_collate_t *collate)
@@ -721,14 +738,14 @@ insert_value (struct linereader *ldfile, struct token *arg,
elem = sym->order;
if (elem == NULL)
- elem = sym->order = new_element (collate, NULL, NULL, NULL, 0);
+ elem = sym->order = new_element (collate, NULL, 0, NULL, NULL, 0);
}
else if (find_entry (&collate->elem_table, arg->val.str.startmb,
arg->val.str.lenmb, (void **) &elem) != 0)
{
/* It's also no collation element. Therefore ignore it. */
lr_ignore_rest (ldfile, 0);
- return;
+ return 1;
}
}
else
@@ -741,6 +758,7 @@ insert_value (struct linereader *ldfile, struct token *arg,
/* We have to allocate an entry. */
elem = new_element (collate, seq != NULL ? seq->bytes : NULL,
+ seq != NULL ? seq->nbytes : 0,
wcs, arg->val.str.startmb, arg->val.str.lenmb);
/* And add it to the table. */
@@ -755,14 +773,16 @@ insert_value (struct linereader *ldfile, struct token *arg,
if (elem->next != NULL || (collate->cursor != NULL
&& elem->next == collate->cursor))
{
- lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"),
+ lr_error (ldfile, _("order for `%.*s' already defined at %s:%zu"),
arg->val.str.lenmb, arg->val.str.startmb,
elem->file, elem->line);
lr_ignore_rest (ldfile, 0);
- return;
+ return 1;
}
insert_weights (ldfile, elem, charmap, repertoire, collate, tok_none);
+
+ return 0;
}
@@ -780,8 +800,11 @@ handle_ellipsis (struct linereader *ldfile, struct token *arg,
startp = collate->cursor;
/* Process and add the end-entry. */
- if (arg != NULL)
- insert_value (ldfile, arg, charmap, repertoire, collate);
+ if (arg != NULL
+ && insert_value (ldfile, arg, charmap, repertoire, collate))
+ /* Something went wrong with inserting the to-value. This means
+ we cannot process the ellipsis. */
+ return;
/* Reset the cursor. */
collate->cursor = startp;
@@ -805,7 +828,168 @@ handle_ellipsis (struct linereader *ldfile, struct token *arg,
if (ellipsis == tok_ellipsis3)
{
- /* XXX */
+ /* One requirement we make here: the length of the byte
+ sequences for the first and end character must be the same.
+ This is mainly to prevent unwanted effects and this is often
+ not what is wanted. */
+ size_t len = (startp->mbs != NULL ? strlen (startp->mbs)
+ : (endp->mbs != NULL ? strlen (endp->mbs) : 0));
+ char mbcnt[len + 1];
+ char mbend[len + 1];
+
+ /* Well, this should be caught somewhere else already. Just to
+ make sure. */
+ assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
+ assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
+
+ if (startp != NULL && endp != NULL
+ && startp->mbs != NULL && endp->mbs != NULL
+ && strlen (startp->mbs) != strlen (endp->mbs))
+ {
+ lr_error (ldfile, _("\
+%s: byte sequences of first and last character must have the same length"),
+ "LC_COLLATE");
+ return;
+ }
+
+ /* Determine whether we have to generate multibyte sequences. */
+ if ((startp == NULL || startp->mbs != NULL)
+ && (endp == NULL || endp->mbs != NULL))
+ {
+ int cnt;
+ int ret;
+
+ /* Prepare the beginning byte sequence. This is either from the
+ beginning byte sequence or it is all nulls if it was an
+ initial ellipsis. */
+ if (startp == NULL || startp->mbs == NULL)
+ memset (mbcnt, '\0', len);
+ else
+ {
+ memcpy (mbcnt, startp->mbs, len);
+
+ /* And increment it so that the value is the first one we will
+ try to insert. */
+ for (cnt = len - 1; cnt >= 0; --cnt)
+ if (++mbcnt[cnt] != '\0')
+ break;
+ }
+ mbcnt[len] = '\0';
+
+ /* And the end sequence. */
+ if (endp == NULL || endp->mbs == NULL)
+ memset (mbend, '\0', len);
+ else
+ memcpy (mbend, endp->mbs, len);
+ mbend[len] = '\0';
+
+ /* Test whether we have a correct range. */
+ ret = memcmp (mbcnt, mbend, len);
+ if (ret >= 0)
+ {
+ if (ret > 0)
+ lr_error (ldfile, _("%s: byte sequence of first character of \
+sequence is not lower than that of the last character"), "LC_COLLATE");
+ return;
+ }
+
+ /* Generate the byte sequences data. */
+ while (1)
+ {
+ struct charseq *seq;
+
+ /* Quite a bit of work ahead. We have to find the character
+ definition for the byte sequence and then determine the
+ wide character belonging to it. */
+ seq = charmap_find_symbol (charmap, mbcnt, len);
+ if (seq != NULL)
+ {
+ struct element_t *elem;
+ size_t namelen;
+
+ if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
+ seq->ucs4 = repertoire_find_value (repertoire, seq->name,
+ strlen (seq->name));
+
+ /* I don't this this can ever happen. */
+ assert (seq->name != NULL);
+ namelen = strlen (seq->name);
+
+ /* Now we are ready to insert the new value in the
+ sequence. Find out whether the element is
+ already known. */
+ if (find_entry (&collate->seq_table, seq->name, namelen,
+ (void **) &elem) != 0)
+ {
+ uint32_t wcs[2] = { seq->ucs4, 0 };
+
+ /* We have to allocate an entry. */
+ elem = new_element (collate, mbcnt, len, wcs, seq->name,
+ namelen);
+
+ /* And add it to the table. */
+ if (insert_entry (&collate->seq_table, seq->name,
+ namelen, elem) != 0)
+ /* This cannot happen. */
+ assert (! "Internal error");
+ }
+
+ /* Test whether this element is not already in the list. */
+ if (elem->next != NULL || (collate->cursor != NULL
+ && elem->next == collate->cursor))
+ {
+ lr_error (ldfile, _("\
+order for `%.*s' already defined at %s:%zu"),
+ namelen, seq->name, elem->file, elem->line);
+ goto increment;
+ }
+
+ /* Enqueue the new element. */
+ elem->last = collate->cursor;
+ elem->next = collate->cursor->next;
+ elem->last->next = elem;
+ if (elem->next != NULL)
+ elem->next->last = elem;
+ collate->cursor = elem;
+
+ /* Add the weight value. We take them from the
+ `ellipsis_weights' member of `collate'. */
+ elem->weights = (struct element_list_t *)
+ obstack_alloc (&collate->mempool,
+ nrules * sizeof (struct element_list_t));
+ for (cnt = 0; cnt < nrules; ++cnt)
+ if (collate->ellipsis_weight.weights[cnt].cnt == 1
+ && (collate->ellipsis_weight.weights[cnt].w[0]
+ == ELEMENT_ELLIPSIS2))
+ {
+ elem->weights[cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool,
+ sizeof (struct element_t *));
+ elem->weights[cnt].w[0] = elem;
+ elem->weights[cnt].cnt = 1;
+ }
+ else
+ {
+ /* Simly use the weight from `ellipsis_weight'. */
+ elem->weights[cnt].w =
+ collate->ellipsis_weight.weights[cnt].w;
+ elem->weights[cnt].cnt =
+ collate->ellipsis_weight.weights[cnt].cnt;
+ }
+ }
+
+ /* Increment for the next round. */
+ increment:
+ for (cnt = len - 1; cnt >= 0; --cnt)
+ if (++mbcnt[cnt] != '\0')
+ break;
+
+ /* Find out whether this was all. */
+ if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
+ /* Yep, that's all. */
+ break;
+ }
+ }
}
else
{
@@ -883,7 +1067,7 @@ handle_ellipsis (struct linereader *ldfile, struct token *arg,
&& elem->next == collate->cursor))
{
lr_error (ldfile, _("\
-%s: order for `%.*s' already defined at %s:%Z"),
+%s: order for `%.*s' already defined at %s:%zu"),
"LC_COLLATE", lenfrom, buf,
elem->file, elem->line);
continue;
@@ -923,6 +1107,7 @@ handle_ellipsis (struct linereader *ldfile, struct token *arg,
/* We have to allocate an entry. */
elem = new_element (collate,
seq != NULL ? seq->bytes : NULL,
+ seq != NULL ? seq->nbytes : 0,
wc == ILLEGAL_CHAR_VALUE
? NULL : wcs,
buf, lenfrom);
@@ -1023,6 +1208,67 @@ collate_startup (struct linereader *ldfile, struct localedef_t *locale,
void
collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
{
+ /* Now is the time when we can assign the individual collation
+ values for all the symbols. We have possibly different values
+ for the wide- and the multibyte-character symbols. This is done
+ since it might make a difference in the encoding if there is in
+ some cases no multibyte-character but there are wide-characters.
+ (The other way around it is not important since theencoded
+ collation value in the wide-character case is 32 bits wide and
+ therefore requires no encoding).
+
+ The lowest collation value assigned is 2. Zero is reserved for
+ the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
+ functions and 1 is used to separate the individual passes for the
+ different rules.
+
+ We also have to construct is list with all the bytes/words which
+ can come first in a sequence, followed by all the elements which
+ also start with this byte/word. The order is reverse which has
+ among others the important effect that longer strings are located
+ first in the list. This is required for the output data since
+ the algorithm used in `strcoll' etc depends on this.
+
+ The multibyte case is easy. We simply sort into an array with
+ 256 elements. */
+ struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+ int mbact = 2;
+ int wcact = 2;
+ struct element_t *runp = collate->start;
+
+ while (runp != NULL)
+ {
+ if (runp->mbs != NULL)
+ {
+ struct element_t **eptr;
+
+ /* Determine the order. */
+ runp->mborder = mbact++;
+
+ /* Find the point where to insert in the list. */
+ eptr = &collate->mbheads[(unsigned int) runp->mbs[0]];
+ while (*eptr != NULL)
+ {
+ /* Check which string is larger, the one we want to insert
+ or the current element of the list we are looking at. */
+ assert (runp->mbs[0] == (*eptr)->mbs[0]);
+ if (strcmp (runp->mbs, (*eptr)->mbs) > 0)
+ break;
+
+ eptr = &(*eptr)->mbnext;
+ }
+
+ /* Set the pointers. */
+ runp->mbnext = *eptr;
+ *eptr = runp;
+ }
+
+ if (runp->wcs != NULL)
+ runp->wcorder = wcact++;
+
+ /* Up to the next entry. */
+ runp = runp->next;
+ }
}
@@ -1257,7 +1503,8 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
if (insert_entry (&collate->elem_table,
symbol, symbol_len,
new_element (collate,
- NULL, NULL, NULL, 0)) < 0)
+ NULL, 0, NULL, symbol,
+ symbol_len)) < 0)
lr_error (ldfile, _("\
error while adding collating element"));
}
@@ -1519,9 +1766,12 @@ error while adding equivalent collating symbol"));
goto err_label;
/* Handle ellipsis at end of list. */
- if (was_ellipsis)
- /* XXX */
- abort ();
+ if (was_ellipsis != tok_none)
+ {
+ handle_ellipsis (ldfile, NULL, was_ellipsis, charmap, repertoire,
+ collate);
+ was_ellipsis = tok_none;
+ }
state = 2;
lr_ignore_rest (ldfile, 1);
@@ -1543,9 +1793,12 @@ error while adding equivalent collating symbol"));
state = 2;
/* Handle ellipsis at end of list. */
- if (was_ellipsis)
- /* XXX */
- abort ();
+ if (was_ellipsis != tok_none)
+ {
+ handle_ellipsis (ldfile, arg, was_ellipsis, charmap,
+ repertoire, collate);
+ was_ellipsis = tok_none;
+ }
}
else if (state != 2 && state != 3)
goto err_label;
@@ -1610,9 +1863,12 @@ error while adding equivalent collating symbol"));
state = 2;
/* Handle ellipsis at end of list. */
- if (was_ellipsis)
- /* XXX */
- abort ();
+ if (was_ellipsis != tok_none)
+ {
+ handle_ellipsis (ldfile, NULL, was_ellipsis, charmap,
+ repertoire, collate);
+ was_ellipsis = tok_none;
+ }
}
else if (state == 3)
{
@@ -1848,7 +2104,7 @@ error while adding equivalent collating symbol"));
&& collate->undefined.next == collate->cursor))
{
lr_error (ldfile,
- _("%s: order for `%.*s' already defined at %s:%Z"),
+ _("%s: order for `%.*s' already defined at %s:%zu"),
"LC_COLLATE", 9, "UNDEFINED", collate->undefined.file,
collate->undefined.line);
lr_ignore_rest (ldfile, 0);
@@ -1892,9 +2148,12 @@ error while adding equivalent collating symbol"));
"LC_COLLATE");
/* Handle ellipsis at end of list. */
- if (was_ellipsis)
- /* XXX */
- abort ();
+ if (was_ellipsis != tok_none)
+ {
+ handle_ellipsis (ldfile, NULL, was_ellipsis, charmap,
+ repertoire, collate);
+ was_ellipsis = tok_none;
+ }
}
else if (state == 3)
error (0, 0, _("%s: missing `reorder-end' keyword"),