aboutsummaryrefslogtreecommitdiff
path: root/locale/programs/ld-collate.c
diff options
context:
space:
mode:
Diffstat (limited to 'locale/programs/ld-collate.c')
-rw-r--r--locale/programs/ld-collate.c222
1 files changed, 172 insertions, 50 deletions
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index 3260c43a9e..205cf966f2 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -47,6 +47,7 @@ struct element_t;
/* Data type for list of strings. */
struct section_list
{
+ struct section_list *def_next;
struct section_list *next;
/* Name of the section. */
const char *name;
@@ -144,6 +145,8 @@ struct locale_collate_t
int cur_weight_max;
/* List of known scripts. */
+ struct section_list *known_sections;
+ /* List of used sections. */
struct section_list *sections;
/* Current section using definition. */
struct section_list *current_section;
@@ -151,6 +154,9 @@ struct locale_collate_t
struct section_list unnamed_section;
/* To make handling of errors easier we have another section. */
struct section_list error_section;
+ /* Sometimes we are defining the values for collating symbols before
+ the first actual section. */
+ struct section_list symbol_section;
/* Start of the order list. */
struct element_t *start;
@@ -562,7 +568,7 @@ read_directions (struct linereader *ldfile, struct token *arg,
static struct element_t *
find_element (struct linereader *ldfile, struct locale_collate_t *collate,
- const char *str, size_t len, uint32_t *wcstr)
+ const char *str, size_t len)
{
struct element_t *result = NULL;
@@ -668,13 +674,26 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
elem->weights[weight_cnt].w[0] = NULL;
elem->weights[weight_cnt].cnt = 1;
}
- else if (arg->tok == tok_bsymbol)
+ else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
{
- struct element_t *val = find_element (ldfile, collate,
- arg->val.str.startmb,
- arg->val.str.lenmb,
- arg->val.str.startwc);
+ char ucs4str[10];
+ struct element_t *val;
+ char *symstr;
+ size_t symlen;
+ if (arg->tok == tok_bsymbol)
+ {
+ symstr = arg->val.str.startmb;
+ symlen = arg->val.str.lenmb;
+ }
+ else
+ {
+ snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
+ symstr = ucs4str;
+ symlen = 9;
+ }
+
+ val = find_element (ldfile, collate, symstr, symlen);
if (val == NULL)
break;
@@ -720,7 +739,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
}
charelem = find_element (ldfile, collate, startp,
- cp - startp, NULL);
+ cp - startp);
++cp;
}
else
@@ -731,7 +750,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
string as if that would be bsymbols. Otherwise we
would have to match back to bsymbols somehow and this
is normally not what people normally expect. */
- charelem = find_element (ldfile, collate, cp++, 1, NULL);
+ charelem = find_element (ldfile, collate, cp++, 1);
}
if (charelem == NULL)
@@ -1349,7 +1368,7 @@ static void
collate_startup (struct linereader *ldfile, struct localedef_t *locale,
struct localedef_t *copy_locale, int ignore_content)
{
- if (!ignore_content)
+ if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
{
struct locale_collate_t *collate;
@@ -1432,8 +1451,9 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
or in none. */
for (i = 0; i < nrules; ++i)
for (sect = collate->sections; sect != NULL; sect = sect->next)
- if ((sect->rules[i] & sort_position)
- != (collate->sections->rules[i] & sort_position))
+ if (sect->rules != NULL
+ && ((sect->rules[i] & sort_position)
+ != (collate->sections->rules[i] & sort_position)))
{
error (0, 0, _("\
%s: `position' must be used for a specific level in all sections or none"),
@@ -1771,7 +1791,10 @@ Computing table size for collation table might take a while..."),
{
if (need_undefined)
{
- error (0, 0, _("no definition of `UNDEFINED'"));
+ /* This seems not to be enforced by recent standards. Don't
+ emit an error, simply append UNDEFINED at the end. */
+ if (0)
+ error (0, 0, _("no definition of `UNDEFINED'"));
/* Add UNDEFINED at the end. */
collate->undefined.mborder =
@@ -1793,6 +1816,8 @@ Computing table size for collation table might take a while..."),
ruleset the same index. Since there are never many section we can
use an O(n^2) algorithm here. */
sect = collate->sections;
+ while (sect != NULL && sect->rules == NULL)
+ sect = sect->next;
assert (sect != NULL);
ruleidx = 0;
do
@@ -1800,7 +1825,8 @@ Computing table size for collation table might take a while..."),
struct section_list *osect = collate->sections;
while (osect != sect)
- if (memcmp (osect->rules, sect->rules, nrules) == 0)
+ if (osect->rules != NULL
+ && memcmp (osect->rules, sect->rules, nrules) == 0)
break;
else
osect = osect->next;
@@ -1811,7 +1837,9 @@ Computing table size for collation table might take a while..."),
sect->ruleidx = osect->ruleidx;
/* Next section. */
- sect = sect->next;
+ do
+ sect = sect->next;
+ while (sect != NULL && sect->rules == NULL);
}
while (sect != NULL);
/* We are currently not prepared for more than 256 rulesets. But this
@@ -1993,7 +2021,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
/* Prepare the ruleset table. */
for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
- if (sect->ruleidx == i)
+ if (sect->rules != NULL && sect->ruleidx == i)
{
int j;
@@ -2670,7 +2698,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
/* Get the locale definition. */
copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
- repertoire_name, charmap);
+ repertoire_name, charmap, NULL);
if ((copy_locale->avail & COLLATE_LOCALE) == 0)
{
/* Not yet loaded. So do it now. */
@@ -2708,6 +2736,19 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
switch (nowtok)
{
+ case tok_copy:
+ /* Allow copying other locales. */
+ now = lr_token (ldfile, charmap, NULL);
+ if (now->tok != tok_string)
+ goto err_label;
+
+ if (! ignore_content)
+ load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
+ charmap, result);
+
+ lr_ignore_rest (ldfile, 1);
+ break;
+
case tok_coll_weight_max:
/* Ignore the rest of the line if we don't need the input of
this line. */
@@ -2751,8 +2792,11 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
/* Check whether this section is already known. */
struct section_list *known = collate->sections;
while (known != NULL)
- if (strcmp (known->name, arg->val.str.startmb) == 0)
- break;
+ {
+ if (strcmp (known->name, arg->val.str.startmb) == 0)
+ break;
+ known = known->next;
+ }
if (known != NULL)
{
@@ -2822,15 +2866,12 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
repertoire, symbol, symbol_len))
goto col_elem_free;
- if (insert_entry (&collate->elem_table,
- symbol, symbol_len,
- new_element (collate,
- arg->val.str.startmb,
- arg->val.str.lenmb - 1,
- arg->val.str.startwc,
- symbol, symbol_len, 0)) < 0)
- lr_error (ldfile, _("\
-error while adding collating element"));
+ insert_entry (&collate->elem_table, symbol, symbol_len,
+ new_element (collate,
+ arg->val.str.startmb,
+ arg->val.str.lenmb - 1,
+ arg->val.str.startwc,
+ symbol, symbol_len, 0));
}
else
{
@@ -2909,11 +2950,8 @@ error while adding collating element"));
repertoire, symbol, symbol_len))
goto col_sym_free;
- if (insert_entry (&collate->sym_table,
- symbol, symbol_len,
- new_symbol (collate)) < 0)
- lr_error (ldfile, _("\
-error while adding collating symbol"));
+ insert_entry (&collate->sym_table, symbol, symbol_len,
+ new_symbol (collate));
}
else if (symbol_len != endsymbol_len)
{
@@ -2972,11 +3010,8 @@ error while adding collating symbol"));
repertoire, symbuf, symbol_len))
goto col_sym_free;
- if (insert_entry (&collate->sym_table,
- symbuf, symbol_len,
- new_symbol (collate)) < 0)
- lr_error (ldfile, _("\
-error while adding collating symbol"));
+ insert_entry (&collate->sym_table, symbuf,
+ symbol_len, new_symbol (collate));
/* Increment the counter. */
++from;
@@ -3074,6 +3109,44 @@ error while adding equivalent collating symbol"));
lr_ignore_rest (ldfile, 1);
break;
+ case tok_script:
+ /* We get told about the scripts we know. */
+ arg = lr_token (ldfile, charmap, repertoire);
+ if (arg->tok != tok_bsymbol)
+ goto err_label;
+ else
+ {
+ struct section_list *runp = collate->known_sections;
+ char *name;
+
+ while (runp != NULL)
+ if (strncmp (runp->name, arg->val.str.startmb,
+ arg->val.str.lenmb) == 0
+ && runp->name[arg->val.str.lenmb] == '\0')
+ break;
+ else
+ runp = runp->def_next;
+
+ if (runp != NULL)
+ {
+ lr_error (ldfile, _("duplicate definition of script `%s'"),
+ runp->name);
+ lr_ignore_rest (ldfile, 0);
+ break;
+ }
+
+ runp = (struct section_list *) xcalloc (1, sizeof (*runp));
+ name = strncpy (xmalloc (arg->val.str.lenmb + 1),
+ arg->val.str.startmb, arg->val.str.lenmb);
+ name[arg->val.str.lenmb] = '\0';
+ runp->name = name;
+
+ runp->def_next = collate->known_sections;
+ collate->known_sections = runp;
+ }
+ lr_ignore_rest (ldfile, 1);
+ break;
+
case tok_order_start:
/* Ignore the rest of the line if we don't need the input of
this line. */
@@ -3094,10 +3167,13 @@ error while adding equivalent collating symbol"));
if (arg->tok == tok_bsymbol)
{
/* This better should be a section name. */
- struct section_list *sp = collate->sections;
+ struct section_list *sp = collate->known_sections;
while (sp != NULL
- && strcmp (sp->name, arg->val.str.startmb) != 0)
- sp = sp->next;
+ && (sp->name == NULL
+ || strncmp (sp->name, arg->val.str.startmb,
+ arg->val.str.lenmb) != 0
+ || sp->name[arg->val.str.lenmb] != '\0'))
+ sp = sp->def_next;
if (sp == NULL)
{
@@ -3109,15 +3185,21 @@ error while adding equivalent collating symbol"));
if (collate->error_section.first == NULL)
{
- collate->error_section.next = collate->sections;
- collate->sections = &collate->error_section;
+ if (collate->sections == NULL)
+ collate->sections = &collate->error_section;
+ else
+ {
+ sp = collate->sections;
+ while (sp->next != NULL)
+ sp = sp->next;
+
+ collate->error_section.next = NULL;
+ sp->next = &collate->error_section;
+ }
}
}
else
{
- /* Remember this section. */
- collate->current_section = sp;
-
/* One should not be allowed to open the same
section twice. */
if (sp->first != NULL)
@@ -3126,8 +3208,13 @@ error while adding equivalent collating symbol"));
"LC_COLLATE", sp->name);
else
{
- sp->next = collate->sections;
- collate->sections = sp;
+ if (collate->current_section == NULL)
+ collate->current_section = sp;
+ else
+ {
+ sp->next = collate->current_section->next;
+ collate->current_section->next = sp;
+ }
}
/* Next should come the end of the line or a semicolon. */
@@ -3381,10 +3468,10 @@ error while adding equivalent collating symbol"));
break;
}
- if (state != 1 && state != 3 && state != 5)
+ if (state != 0 && state != 1 && state != 3 && state != 5)
goto err_label;
- if (state == 5 && nowtok == tok_ucs4)
+ if ((state == 0 || state == 5) && nowtok == tok_ucs4)
goto err_label;
if (nowtok == tok_ucs4)
@@ -3399,7 +3486,41 @@ error while adding equivalent collating symbol"));
symlen = arg->val.str.lenmb;
}
- if (state == 3)
+ if (state == 0)
+ {
+ /* We are outside an `order_start' region. This means
+ we must only accept definitions of values for
+ collation symbols since these are purely abstract
+ values and don't need dorections associated. */
+ struct element_t *seqp;
+
+ if (find_entry (&collate->seq_table, symstr, symlen,
+ (void **) &seqp) == 0)
+ {
+ /* It's already defined. First check whether this
+ is really a collating symbol. */
+ if (seqp->is_character)
+ goto err_label;
+
+ goto move_entry;
+ }
+ else
+ {
+ void *result;
+
+ if (find_entry (&collate->sym_table, symstr, symlen,
+ &result) != 0)
+ /* No collating symbol, it's an error. */
+ goto err_label;
+
+ /* Maybe this is the first time we define a symbol
+ value and it is before the first actual section. */
+ if (collate->sections == NULL)
+ collate->sections = collate->current_section =
+ &collate->symbol_section;
+ }
+ }
+ else if (state == 3)
{
/* It is possible that we already have this collation sequence.
In this case we move the entry. */
@@ -3416,6 +3537,7 @@ error while adding equivalent collating symbol"));
if (find_entry (&collate->seq_table, symstr, symlen,
(void **) &seqp) == 0)
{
+ move_entry:
/* Remove the entry from the old position. */
if (seqp->last == NULL)
collate->start = seqp->next;