aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-12-31 22:21:25 +0000
committerUlrich Drepper <drepper@redhat.com>1999-12-31 22:21:25 +0000
commitac8295d23b59e34d2f7c5757ea71336eab2c9e6e (patch)
tree7399464a02b52e3cd401338b2f5631733f209458
parent1c5d461740065effc3c0a1d84fd88842a608b7f7 (diff)
downloadglibc-ac8295d23b59e34d2f7c5757ea71336eab2c9e6e.tar
glibc-ac8295d23b59e34d2f7c5757ea71336eab2c9e6e.tar.gz
glibc-ac8295d23b59e34d2f7c5757ea71336eab2c9e6e.tar.bz2
glibc-ac8295d23b59e34d2f7c5757ea71336eab2c9e6e.zip
(collate_output): Update.
* locale/programs/ld-collate.c (collate_output): Emit correct information for collation elements. Don't write over end of array idx. * posix/regex.c: Handle also collation elements at end of range. * posix/PTESTS: Fix a few typos.
-rw-r--r--ChangeLog7
-rw-r--r--locale/programs/ld-collate.c45
-rw-r--r--posix/PTESTS12
-rw-r--r--posix/ptestcases.h12
-rw-r--r--posix/regex.c91
5 files changed, 86 insertions, 81 deletions
diff --git a/ChangeLog b/ChangeLog
index 8c10f3a301..0192430ee2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
1999-12-31 Ulrich Drepper <drepper@cygnus.com>
+ * locale/programs/ld-collate.c (collate_output): Emit correct
+ information for collation elements.
+ Don't write over end of array idx.
+ * posix/regex.c: Handle also collation elements at end of range.
+
+ * posix/PTESTS: Fix a few typos.
+
* posix/bits/posix2_lim.h: Remove _POSIX2_EQUIV_CLASS_MAX. I have
no idea where this came from.
* sysdeps/posix/sysconf.c: Remove _POSIX2_EQUIV_CLASS_MAX
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index 8eb47d7f8e..2cbea388b2 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -91,8 +91,6 @@ struct element_t
unsigned int used_in_level;
struct element_list_t *weights;
- /* Index in the `weight' table in the output file for the character. */
- int32_t weights_idx;
/* Nonzero if this is a real character definition. */
int is_character;
@@ -301,7 +299,6 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
/* Will be allocated later. */
newp->weights = NULL;
- newp->weights_idx = 0;
newp->file = NULL;
newp->line = 0;
@@ -1809,9 +1806,6 @@ output_weight (struct obstack *pool, struct locale_collate_t *collate,
obstack_grow (pool, buf, len);
}
- /* Remember the index. */
- elem->weights_idx = retval;
-
return retval | ((elem->section->ruleidx & 0x7f) << 24);
}
@@ -1899,11 +1893,26 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
/* If we have no LC_COLLATE data emit only the number of rules as zero. */
if (collate == NULL)
{
+ int32_t dummy = 0;
+
while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
{
- iov[2 + cnt].iov_base = (char *) "";
- iov[2 + cnt].iov_len = 0;
- idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+ /* The words have to be handled specially. */
+ if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)
+ || cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)
+ || cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
+ {
+ iov[2 + cnt].iov_base = &dummy;
+ iov[2 + cnt].iov_len = sizeof (int32_t);
+ }
+ else
+ {
+ iov[2 + cnt].iov_base = (char *) "";
+ iov[2 + cnt].iov_len = 0;
+ }
+
+ if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
+ idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
++cnt;
}
@@ -2453,23 +2462,20 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
elem_table[idx * 2] = hash;
elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
- /* Now add the index into the weights table. We know the
- address is always 32bit aligned. */
- if (sizeof (int) == sizeof (int32_t))
- obstack_int_grow (&extrapool, runp->weights_idx);
- else
- obstack_grow (&extrapool, &runp->weights_idx,
- sizeof (int32_t));
-
/* The the string itself including length. */
obstack_1grow (&extrapool, namelen);
obstack_grow (&extrapool, runp->name, namelen);
+ /* And the multibyte representation. */
+ obstack_1grow (&extrapool, runp->nmbs);
+ obstack_grow (&extrapool, runp->mbs, runp->nmbs);
+
/* And align again to 32 bits. */
- if ((1 + namelen) % sizeof (int32_t) != 0)
+ if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
obstack_grow (&extrapool, "\0\0",
(sizeof (int32_t)
- - (1 + namelen) % sizeof (int32_t)));
+ - ((1 + namelen + 1 + runp->nmbs)
+ % sizeof (int32_t))));
}
}
@@ -2492,7 +2498,6 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
iov[2 + cnt].iov_base = obstack_finish (&extrapool);
- idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
++cnt;
diff --git a/posix/PTESTS b/posix/PTESTS
index 3580c81a03..b017f5b3f2 100644
--- a/posix/PTESTS
+++ b/posix/PTESTS
@@ -115,7 +115,7 @@
3¦3¦[][.-.]-0]¦ab0-]¦
3¦3¦[A-[.].]c]¦ab]!¦
# GA122
--2¦-2¦[[.ch]]¦abc¦
+-2¦-2¦[[.ch.]]¦abc¦
-2¦-2¦[[.ab.][.CD.][.EF.]]¦yZabCDEFQ9¦
# GA125
2¦2¦[[=a=]b]¦Abc¦
@@ -163,12 +163,12 @@
2¦6¦bc[d-w]xy¦abchxyz¦
# GA129
1¦1¦[a-cd-f]¦dbccde¦
--1¦-1¦[a-ce-f¦dBCCdE¦
+-1¦-1¦[a-ce-f]¦dBCCdE¦
2¦4¦b[n-zA-M]Y¦absY9Z¦
2¦4¦b[n-zA-M]Y¦abGY9Z¦
# GA130
3¦3¦[-xy]¦ac-¦
-2¦4¦[c[-xy]D¦ac-D+¦
+2¦4¦c[-xy]D¦ac-D+¦
2¦2¦[--/]¦a.b¦
2¦4¦c[--/]D¦ac.D+b¦
2¦2¦[^-ac]¦abcde-¦
@@ -189,7 +189,7 @@
3¦4¦[a-c][e-f]¦acbedf¦
4¦8¦abc*XYZ¦890abXYZ#*¦
4¦9¦abc*XYZ¦890abcXYZ#*¦
-4¦15¦abc*XYZ¦890abccccccccXYZ#*¦
+4¦15¦abc*XYZ¦890abcccccccXYZ#*¦
-1¦-1¦abc*XYZ¦890abc*XYZ#*¦
# GA132
2¦4¦\(*bc\)¦a*bc¦
@@ -267,7 +267,7 @@
1¦1¦^a¦abc¦
-1¦-1¦^b¦abc¦
-1¦-1¦^[a-zA-Z]¦99Nine¦
-1¦4¦^[a-zA-Z]¦Nine99¦
+1¦4¦^[a-zA-Z]*¦Nine99¦
# GA145(1)
1¦2¦\(^a\)\1¦aabc¦
-1¦-1¦\(^a\)\1¦^a^abc¦
@@ -284,7 +284,7 @@
3¦3¦a$¦cba¦
-1¦-1¦a$¦abc¦
5¦7¦[a-z]*$¦99ZZxyz¦
--1¦-1¦[a-z]*$¦99ZZxyz99¦
+9¦9¦[a-z]*$¦99ZZxyz99¦
3¦3¦$$¦ab$¦
-1¦-1¦$$¦$ab¦
3¦3¦\$$¦ab$¦
diff --git a/posix/ptestcases.h b/posix/ptestcases.h
index d6e099c82b..87f584d2e3 100644
--- a/posix/ptestcases.h
+++ b/posix/ptestcases.h
@@ -110,7 +110,7 @@
{ 3, 3, "[][.-.]-0]", "ab0-]", },
{ 3, 3, "[A-[.].]c]", "ab]!", },
{ 0, 0, "GA122", NULL, },
- { -2, -2, "[[.ch]]", "abc", },
+ { -2, -2, "[[.ch.]]", "abc", },
{ -2, -2, "[[.ab.][.CD.][.EF.]]", "yZabCDEFQ9", },
{ 0, 0, "GA125", NULL, },
{ 2, 2, "[[=a=]b]", "Abc", },
@@ -158,12 +158,12 @@
{ 2, 6, "bc[d-w]xy", "abchxyz", },
{ 0, 0, "GA129", NULL, },
{ 1, 1, "[a-cd-f]", "dbccde", },
- { -1, -1, "[a-ce-f", "dBCCdE", },
+ { -1, -1, "[a-ce-f]", "dBCCdE", },
{ 2, 4, "b[n-zA-M]Y", "absY9Z", },
{ 2, 4, "b[n-zA-M]Y", "abGY9Z", },
{ 0, 0, "GA130", NULL, },
{ 3, 3, "[-xy]", "ac-", },
- { 2, 4, "[c[-xy]D", "ac-D+", },
+ { 2, 4, "c[-xy]D", "ac-D+", },
{ 2, 2, "[--/]", "a.b", },
{ 2, 4, "c[--/]D", "ac.D+b", },
{ 2, 2, "[^-ac]", "abcde-", },
@@ -184,7 +184,7 @@
{ 3, 4, "[a-c][e-f]", "acbedf", },
{ 4, 8, "abc*XYZ", "890abXYZ#*", },
{ 4, 9, "abc*XYZ", "890abcXYZ#*", },
- { 4, 15, "abc*XYZ", "890abccccccccXYZ#*", },
+ { 4, 15, "abc*XYZ", "890abcccccccXYZ#*", },
{ -1, -1, "abc*XYZ", "890abc*XYZ#*", },
{ 0, 0, "GA132", NULL, },
{ 2, 4, "\\(*bc\\)", "a*bc", },
@@ -262,7 +262,7 @@
{ 1, 1, "^a", "abc", },
{ -1, -1, "^b", "abc", },
{ -1, -1, "^[a-zA-Z]", "99Nine", },
- { 1, 4, "^[a-zA-Z]", "Nine99", },
+ { 1, 4, "^[a-zA-Z]*", "Nine99", },
{ 0, 0, "GA145(1)", NULL, },
{ 1, 2, "\\(^a\\)\\1", "aabc", },
{ -1, -1, "\\(^a\\)\\1", "^a^abc", },
@@ -274,7 +274,7 @@
{ 3, 3, "a$", "cba", },
{ -1, -1, "a$", "abc", },
{ 5, 7, "[a-z]*$", "99ZZxyz", },
- { -1, -1, "[a-z]*$", "99ZZxyz99", },
+ { 9, 9, "[a-z]*$", "99ZZxyz99", },
{ 3, 3, "$$", "ab$", },
{ -1, -1, "$$", "$ab", },
{ 3, 3, "\\$$", "ab$", },
diff --git a/posix/regex.c b/posix/regex.c
index a59f5d4a71..d036a7dd3a 100644
--- a/posix/regex.c
+++ b/posix/regex.c
@@ -1570,7 +1570,8 @@ static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
reg_syntax_t syntax));
static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
reg_syntax_t syntax));
-static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
+static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
+ const char **p_ptr,
const char *pend,
char *translate,
reg_syntax_t syntax,
@@ -2174,6 +2175,7 @@ regex_compile (pattern, size, syntax, bufp)
case '[':
{
boolean had_char_class = false;
+ unsigned int range_start = 0xffffffff;
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
@@ -2217,6 +2219,7 @@ regex_compile (pattern, size, syntax, bufp)
PATFETCH (c1);
SET_LIST_BIT (c1);
+ range_start = c1;
continue;
}
@@ -2241,8 +2244,10 @@ regex_compile (pattern, size, syntax, bufp)
&& *p != ']')
{
reg_errcode_t ret
- = compile_range (&p, pend, translate, syntax, b);
+ = compile_range (range_start, &p, pend, translate,
+ syntax, b);
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ range_start = 0xffffffff;
}
else if (p[0] == '-' && p[1] != ']')
@@ -2252,8 +2257,9 @@ regex_compile (pattern, size, syntax, bufp)
/* Move past the `-'. */
PATFETCH (c1);
- ret = compile_range (&p, pend, translate, syntax, b);
+ ret = compile_range (c, &p, pend, translate, syntax, b);
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+ range_start = 0xffffffff;
}
/* See if we're at the beginning of a possible character
@@ -2376,6 +2382,7 @@ regex_compile (pattern, size, syntax, bufp)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT (':');
+ range_start = ':';
had_char_class = false;
}
}
@@ -2503,6 +2510,16 @@ regex_compile (pattern, size, syntax, bufp)
#endif
had_char_class = true;
}
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT ('=');
+ range_start = '=';
+ had_char_class = false;
+ }
}
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
{
@@ -2553,6 +2570,7 @@ regex_compile (pattern, size, syntax, bufp)
/* Set the bit for the character. */
SET_LIST_BIT (str[0]);
+ range_start = ((const unsigned char *) str)[0];
}
#ifdef _LIBC
else
@@ -2561,9 +2579,7 @@ regex_compile (pattern, size, syntax, bufp)
those known to the collate implementation.
First find out whether the bytes in `str' are
actually from exactly one character. */
- const unsigned char *weights;
int32_t table_size;
- const int32_t *table;
const int32_t *symb_table;
const unsigned char *extra;
int32_t idx;
@@ -2574,10 +2590,6 @@ regex_compile (pattern, size, syntax, bufp)
int32_t hash;
int ch;
- table = (const int32_t *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
- weights = (const unsigned char *)
- _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
table_size =
_NL_CURRENT_WORD (LC_COLLATE,
_NL_COLLATE_SYMB_HASH_SIZEMB);
@@ -2598,17 +2610,15 @@ regex_compile (pattern, size, syntax, bufp)
{
/* First compare the hashing value. */
if (symb_table[2 * elem] == hash
- && (c1 == extra[symb_table[2 * elem + 1]
- + sizeof (int32_t)])
+ && c1 == extra[symb_table[2 * elem + 1]]
&& memcmp (str,
&extra[symb_table[2 * elem + 1]
- + sizeof (int32_t) + 1],
+ + 1],
c1) == 0)
{
/* Yep, this is the entry. */
- idx = *((int32_t *)
- (extra
- + symb_table[2 * elem + 1]));
+ idx = symb_table[2 * elem + 1];
+ idx += 1 + extra[idx];
break;
}
@@ -2624,40 +2634,21 @@ regex_compile (pattern, size, syntax, bufp)
class. */
PATFETCH (c);
- /* Now we have to go throught the whole table
- and find all characters which have the same
- first level weight.
+ /* Now add the multibyte character(s) we found
+ to the acceptabed list.
XXX Note that this is not entirely correct.
we would have to match multibyte sequences
but this is not possible with the current
- implementation. */
- for (ch = 1; ch < 256; ++ch)
- /* XXX This test would have to be changed if we
- would allow matching multibyte sequences. */
- if (table[ch] > 0)
- {
- int32_t idx2 = table[ch];
- size_t len = weights[idx2];
-
- /* Test whether the lenghts match. */
- if (weights[idx] == len)
- {
- /* They do. New compare the bytes of
- the weight. */
- size_t cnt = 0;
-
- while (cnt < len
- && (weights[idx + 1 + cnt]
- == weights[idx2 + 1 + cnt]))
- ++len;
-
- if (cnt == len)
- /* They match. Mark the character as
- acceptable. */
- SET_LIST_BIT (ch);
- }
- }
+ implementation. Also, we have to match
+ collating symbols, which expand to more than
+ one file, as a whole and not allow the
+ individual bytes. */
+ c1 = extra[idx++];
+ if (c1 == 1)
+ range_start = extra[idx];
+ while (c1-- > 0)
+ SET_LIST_BIT (extra[idx++]);
}
#endif
had_char_class = false;
@@ -2668,7 +2659,8 @@ regex_compile (pattern, size, syntax, bufp)
while (c1--)
PATUNFETCH;
SET_LIST_BIT ('[');
- SET_LIST_BIT ('=');
+ SET_LIST_BIT ('.');
+ range_start = '.';
had_char_class = false;
}
}
@@ -2676,6 +2668,7 @@ regex_compile (pattern, size, syntax, bufp)
{
had_char_class = false;
SET_LIST_BIT (c);
+ range_start = c;
}
}
@@ -3425,7 +3418,8 @@ group_in_compile_stack (compile_stack, regnum)
`regex_compile' itself. */
static reg_errcode_t
-compile_range (p_ptr, pend, translate, syntax, b)
+compile_range (range_start, p_ptr, pend, translate, syntax, b)
+ unsigned int range_start;
const char **p_ptr, *pend;
RE_TRANSLATE_TYPE translate;
reg_syntax_t syntax;
@@ -3434,7 +3428,7 @@ compile_range (p_ptr, pend, translate, syntax, b)
unsigned this_char;
const char *p = *p_ptr;
- unsigned int range_start, range_end;
+ unsigned int range_end;
if (p == pend)
return REG_ERANGE;
@@ -3447,7 +3441,6 @@ compile_range (p_ptr, pend, translate, syntax, b)
We also want to fetch the endpoints without translating them; the
appropriate translation is done in the bit-setting loop below. */
/* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
- range_start = ((const unsigned char *) p)[-2];
range_end = ((const unsigned char *) p)[0];
/* Have to increment the pointer into the pattern string, so the