aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--locale/programs/ld-collate.c337
-rw-r--r--localedata/ChangeLog4
-rw-r--r--localedata/locales/de_DE16
4 files changed, 292 insertions, 70 deletions
diff --git a/ChangeLog b/ChangeLog
index 83dc270de7..dcab3bd634 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+1999-12-10 Ulrich Drepper <drepper@cygnus.com>
+
+ * locale/programs/ld-collate.c: Many changes to implement parsing
+ of collation definition (still not complete).
+
1999-12-09 Andreas Jaeger <aj@suse.de>
* nis/nss_compat/compat-pwd.c (internal_getpwuid_r): Always set
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index 2858f641d6..ae689e9122 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -54,6 +54,16 @@ struct section_list
enum coll_sort_rule *rules;
};
+struct element_t;
+
+struct element_list_t
+{
+ /* Number of elements. */
+ int cnt;
+
+ struct element_t **w;
+};
+
/* Data type for collating element. */
struct element_t
{
@@ -61,7 +71,7 @@ struct element_t
const uint32_t *wcs;
int order;
- struct element_t **weights;
+ struct element_list_t *weights;
/* Where does the definition come from. */
const char *file;
@@ -158,16 +168,19 @@ make_seclist_elem (struct locale_collate_t *collate, const char *string,
static struct element_t *
new_element (struct locale_collate_t *collate, const char *mbs,
- const uint32_t *wcs)
+ size_t len, const uint32_t *wcs)
{
struct element_t *newp;
newp = (struct element_t *) obstack_alloc (&collate->mempool,
sizeof (*newp));
- newp->mbs = mbs;
+ newp->mbs = obstack_copy0 (&collate->mempool, mbs, len);
newp->wcs = wcs;
newp->order = 0;
+ /* Will be allocated later. */
+ newp->weights = NULL;
+
newp->file = NULL;
newp->line = 0;
@@ -404,6 +417,223 @@ read_directions (struct linereader *ldfile, struct token *arg,
}
+static struct element_t *
+find_element (struct linereader *ldfile, struct locale_collate_t *collate,
+ const char *str, size_t len, uint32_t *wcstr)
+{
+ struct element_t *result = NULL;
+
+ /* Search for the entries among the collation sequences already define. */
+ if (find_entry (&collate->seq_table, str, len, (void **) &result) != 0)
+ {
+ /* Nope, not define yet. So we see whether it is a
+ collation symbol. */
+ void *ptr;
+
+ if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
+ {
+ /* It's a collation symbol. */
+ struct symbol_t *sym = (struct symbol_t *) ptr;
+ result = sym->order;
+
+ if (result == NULL)
+ result = sym->order = new_element (collate, str, len, NULL);
+ }
+ else if (find_entry (&collate->elem_table, str, len,
+ (void **) &result) != 0)
+ {
+ /* It's also no collation element. So it is an element defined
+ later. */
+ result = new_element (collate, str, len, wcstr);
+ if (result != NULL)
+ /* Insert it into the sequence table. */
+ insert_entry (&collate->seq_table, str, len, result);
+ }
+ }
+
+ return result;
+}
+
+
+static void
+insert_weights (struct linereader *ldfile, struct element_t *elem,
+ struct charmap_t *charmap, struct repertoire_t *repertoire,
+ struct locale_collate_t *collate)
+{
+ int weight_cnt;
+ struct token *arg;
+
+ /* Initialize all the fields. */
+ elem->file = ldfile->fname;
+ elem->line = ldfile->lineno;
+ elem->last = collate->cursor;
+ elem->next = collate->cursor ? collate->cursor->next : NULL;
+ elem->weights = (struct element_list_t *)
+ obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
+ memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
+
+ if (collate->current_section->first == NULL)
+ collate->current_section->first = elem;
+ if (collate->current_section->last == collate->cursor)
+ collate->current_section->last = elem;
+
+ collate->cursor = elem;
+
+ weight_cnt = 0;
+
+ arg = lr_token (ldfile, charmap, repertoire);
+ do
+ {
+ if (arg->tok == tok_eof || arg->tok == tok_eol)
+ break;
+
+ if (arg->tok == tok_ignore)
+ {
+ /* The weight for this level has to be ignored. We use the
+ null pointer to indicate this. */
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool, sizeof (struct element_t *));
+ elem->weights[weight_cnt].w[0] = NULL;
+ elem->weights[weight_cnt].cnt = 0;
+ }
+ else if (arg->tok == tok_bsymbol)
+ {
+ struct element_t *val = find_element (ldfile, collate,
+ arg->val.str.startmb,
+ arg->val.str.lenmb,
+ arg->val.str.startwc);
+
+ if (val == NULL)
+ break;
+
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool, sizeof (struct element_t *));
+ elem->weights[weight_cnt].w[0] = val;
+ elem->weights[weight_cnt].cnt = 1;
+ }
+ else if (arg->tok == tok_string)
+ {
+ /* Split the string up in the individual characters and put
+ the element definitions in the list. */
+ const char *cp = arg->val.str.startmb;
+ int cnt = 0;
+ struct element_t *charelem;
+ void *base = obstack_base (&collate->mempool);
+
+ if (*cp == '\0')
+ {
+ lr_error (ldfile, _("%s: empty weight string not allowed"),
+ "LC_COLLATE");
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ do
+ {
+ if (*cp == '<')
+ {
+ /* Ahh, it's a bsymbol. That's what we want. */
+ const char *startp = cp;
+
+ while (*++cp != '>')
+ {
+ if (*cp == ldfile->escape_char)
+ ++cp;
+ if (*cp == '\0')
+ {
+ /* It's a syntax error. */
+ obstack_free (&collate->mempool, base);
+ goto syntax;
+ }
+ }
+
+ charelem = find_element (ldfile, collate, startp,
+ cp - startp, NULL);
+ ++cp;
+ }
+ else
+ {
+ /* People really shouldn't use characters directly in
+ the string. Especially since it's not really clear
+ what this means. We interpret all characters in the
+ string as if that would be bsymbols. Otherwise we
+ would have to match back to bsymbols somehow and this
+ is also not what people normally expect. */
+ charelem = find_element (ldfile, collate, cp++, 1, NULL);
+ }
+
+ if (charelem == NULL)
+ {
+ /* We ignore the rest of the line. */
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ /* Add the pointer. */
+ obstack_ptr_grow (&collate->mempool, charelem);
+ ++cnt;
+ }
+ while (*cp != '\0');
+
+ /* Now store the information. */
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_finish (&collate->mempool);
+ elem->weights[weight_cnt].cnt = cnt;
+
+ /* We don't need the string anymore. */
+ free (arg->val.str.startmb);
+ }
+ else
+ {
+ syntax:
+ /* It's a syntax error. */
+ lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ arg = lr_token (ldfile, charmap, repertoire);
+ /* This better should be the end of the line or a semicolon. */
+ if (arg->tok == tok_semicolon)
+ /* OK, ignore this and read the next token. */
+ arg = lr_token (ldfile, charmap, repertoire);
+ else if (arg->tok != tok_eof && arg->tok != tok_eol)
+ {
+ /* It's a syntax error. */
+ lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+ }
+ while (++weight_cnt < nrules);
+
+ if (weight_cnt < nrules)
+ {
+ /* This means the rest of the line uses the current element as
+ the weight. */
+ do
+ {
+ elem->weights[weight_cnt].w = (struct element_t **)
+ obstack_alloc (&collate->mempool, sizeof (struct element_t *));
+ elem->weights[weight_cnt].w[0] = elem;
+ elem->weights[weight_cnt].cnt = 1;
+ }
+ while (++weight_cnt < nrules);
+ }
+ else
+ {
+ if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
+ {
+ /* Too many rule values. */
+ lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
+ lr_ignore_rest (ldfile, 0);
+ }
+ else
+ lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
+ }
+}
+
+
static void
insert_value (struct linereader *ldfile, struct token *arg,
struct charmap_t *charmap, struct repertoire_t *repertoire,
@@ -413,7 +643,6 @@ insert_value (struct linereader *ldfile, struct token *arg,
struct charseq *seq;
uint32_t wc;
struct element_t *elem = NULL;
- int weight_cnt;
/* First determine the wide character. There must be such a value,
otherwise we ignore it (if it is no collatio symbol or element). */
@@ -438,24 +667,36 @@ insert_value (struct linereader *ldfile, struct token *arg,
if (elem == NULL)
elem = sym->order = new_element (collate, arg->val.str.startmb,
+ arg->val.str.lenmb,
arg->val.str.startwc);
}
else if (find_entry (&collate->elem_table, arg->val.str.startmb,
arg->val.str.lenmb, (void **) &elem) != 0)
- /* It's also no collation element. Therefore ignore it. */
- return;
+ {
+ /* It's also no collation element. Therefore ignore it. */
+ lr_ignore_rest (ldfile, 0);
+ return;
+ }
}
else
{
- /* Otherwise the symbols stands for an character. Make sure it is
- not already in the table. */
-
+ /* Otherwise the symbols stands for a character. */
+ if (find_entry (&collate->seq_table, arg->val.str.startmb,
+ arg->val.str.lenmb, (void **) &elem) != 0)
+ {
+ /* We have to allocate an entry. */
+ elem = new_element (collate, arg->val.str.startmb,
+ arg->val.str.lenmb,
+ arg->val.str.startwc);
+
+ /* And add it to the table. */
+ if (insert_entry (&collate->seq_table, arg->val.str.startmb,
+ arg->val.str.lenmb, elem) != 0)
+ /* This cannot happen. */
+ abort ();
+ }
}
- if (elem == NULL)
- /* XXX HACK HACK HACK */
- return;
-
/* Test whether this element is not already in the list. */
if (elem->next != NULL || (collate->cursor != NULL
&& elem->next == collate->cursor))
@@ -463,57 +704,11 @@ insert_value (struct linereader *ldfile, struct token *arg,
lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"),
arg->val.str.lenmb, arg->val.str.startmb,
elem->file, elem->line);
+ lr_ignore_rest (ldfile, 0);
return;
}
- /* Initialize all the fields. */
- elem->file = ldfile->fname;
- elem->line = ldfile->lineno;
- elem->last = collate->cursor;
- elem->next = collate->cursor ? collate->cursor->next : NULL;
- elem->weights = (struct element_t **)
- obstack_alloc (&collate->mempool, nrules * sizeof (struct element_t *));
- memset (elem->weights, '\0', nrules * sizeof (struct element_t *));
-
- if (collate->current_section->first == NULL)
- collate->current_section->first = elem;
- if (collate->current_section->last == collate->cursor)
- collate->current_section->last = elem;
-
- collate->cursor = elem;
-
- /* Now read the rest of the line. */
- ldfile->return_widestr = 1;
-
- weight_cnt = 0;
- do
- {
- arg = lr_token (ldfile, charmap, repertoire);
-
- if (arg->tok == tok_eof || arg->tok == tok_eol)
- {
- /* This means the rest of the line uses the current element
- as the weight. */
- do
- elem->weights[weight_cnt] = elem;
- while (++weight_cnt < nrules);
-
- return;
- }
-
- if (arg->tok == tok_ignore)
- {
- /* The weight for this level has to be ignored. We use the
- null pointer to indicate this. */
- }
- else if (arg->tok == tok_bsymbol)
- {
-
- }
- }
- while (++weight_cnt < nrules);
-
- lr_ignore_rest (ldfile, weight_cnt == nrules);
+ insert_weights (ldfile, elem, charmap, repertoire, collate);
}
@@ -749,6 +944,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
symbol, symbol_len,
new_element (collate,
arg->val.str.startmb,
+ arg->val.str.lenmb,
arg->val.str.startwc))
< 0)
lr_error (ldfile, _("\
@@ -994,6 +1190,9 @@ error while adding equivalent collating symbol"));
/* Now read the direction names. */
read_directions (ldfile, arg, charmap, repertoire, collate);
+
+ /* From now be need the strings untranslated. */
+ ldfile->translate_strings = 0;
break;
case tok_order_end:
@@ -1099,7 +1298,21 @@ error while adding equivalent collating symbol"));
if (state != 1)
goto err_label;
- /* XXX handle UNDEFINED weight */
+
+ /* See whether UNDEFINED already appeared somewhere. */
+ if (collate->undefined.next != NULL
+ || (collate->cursor != NULL
+ && collate->undefined.next == collate->cursor))
+ {
+ lr_error (ldfile, _("order for `%.*s' already defined at %s:%Z"),
+ 9, "UNDEFINED", collate->undefined.file,
+ collate->undefined.line);
+ lr_ignore_rest (ldfile, 0);
+ }
+ else
+ /* Parse the weights. */
+ insert_weights (ldfile, &collate->undefined, charmap,
+ repertoire, collate);
break;
case tok_ellipsis3:
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index 5668867dd4..cfa1896974 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,3 +1,7 @@
+1999-12-10 Ulrich Drepper <drepper@cygnus.com>
+
+ * locales/de_DE: Correct syntax of multi-character weights.
+
1999-12-08 Ulrich Drepper <drepper@cygnus.com>
* tests/test6.c: New file.
diff --git a/localedata/locales/de_DE b/localedata/locales/de_DE
index 26295d77de..10f302cad2 100644
--- a/localedata/locales/de_DE
+++ b/localedata/locales/de_DE
@@ -1937,14 +1937,14 @@ UNDEFINED IGNORE;IGNORE;IGNORE
<8a> <8>;<8a>;IGNORE;IGNORE
<9a> <9>;<9a>;IGNORE;IGNORE
-<lM-> <l+><aM>;<l+><aM>;<lM-><lM->;IGNORE
-<lM.> <l+><aM>;<l+><aM.>;<lM.><lM.>;IGNORE
-<lH-> <l+><aH>;<l+><aH>;<lH-><lH->;IGNORE
-<lH.> <l+><aH>;<l+><aH.>;<lH.><lH.>;IGNORE
-<lh-> <l+><ah>;<l+><ah>;<lh-><lh->;IGNORE
-<lh.> <l+><ah>;<l+><ah.>;<lh.><lh.>;IGNORE
-<la-> <l+><a+>;<l+><a+->;<la-><la->;IGNORE
-<la.> <l+><a+>;<l+><a+.>;<la.><la.>;IGNORE
+<lM-> "<l+><aM>";"<l+><aM>";"<lM-><lM->";IGNORE
+<lM.> "<l+><aM>";"<l+><aM.>";"<lM.><lM.>";IGNORE
+<lH-> "<l+><aH>";"<l+><aH>";"<lH-><lH->";IGNORE
+<lH.> "<l+><aH>";"<l+><aH.>";"<lH.><lH.>";IGNORE
+<lh-> "<l+><ah>";"<l+><ah>";"<lh-><lh->";IGNORE
+<lh.> "<l+><ah>";"<l+><ah.>";"<lh.><lh.>";IGNORE
+<la-> "<l+><a+>";"<l+><a+->";"<la-><la->";IGNORE
+<la.> "<l+><a+>";"<l+><a+.>";"<la.><la.>";IGNORE
% katakana/hiragana sorting
% base is katakana, as this is present in most charsets