diff options
author | Ulrich Drepper <drepper@redhat.com> | 2000-06-06 03:16:30 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2000-06-06 03:16:30 +0000 |
commit | 85830c4c4688b30d3d76111aa9a26745c7b141d6 (patch) | |
tree | 8b2b99b4e538983ac760fb4faa94ce0a42121fa2 /iconv | |
parent | b0f1ca68448e9049cb6d266f327ca36523f3f675 (diff) | |
download | glibc-85830c4c4688b30d3d76111aa9a26745c7b141d6.tar glibc-85830c4c4688b30d3d76111aa9a26745c7b141d6.tar.gz glibc-85830c4c4688b30d3d76111aa9a26745c7b141d6.tar.bz2 glibc-85830c4c4688b30d3d76111aa9a26745c7b141d6.zip |
Update.
* iconv/gconv.h (__GCONV_IS_LAST, __GCONV_IGNORE_ERRORS): Define.
(struct __gconv_step_data): Rename __is_last to __flags.
* iconv/gconv_close.c: Change all uses of __is_last.
* iconv/skeleton.c: Likewise.
* iconvdata/iso-2022-cn.c: Likewise.
* iconvdata/iso-2022-jp.c: Likewise.
* iconvdata/iso-2022-kr.c: Likewise.
* iconv/gconv_open.c: Likewise. Avoid unneeded initializations.
Recognize IGNORE error handling, set flag, and remove from name.
* iconv/loop.c (ignore_errors_p): Define.
Add flags parameter to both functions.
* iconv/skeleton.c: Pass flags to all conversion functions.
* iconv/gconv_simple.c: Add flags parameter to all functions.
Don't return error for invald error if ignore flag is set.
(ucs4_internal_loop_single): Add missing pointer increment.
(internal_ucs4le_loop_single): Likewise.
* iconv/iconv_prog.c: Implement handling of -c parameter.
* iconvdata/8bit-gap.c: Don't return error for invald error if
ignore flag is set.
* iconvdata/8bit-generic.c: Likewise.
* iconvdata/ansi_x3.110.c: Likewise.
* iconvdata/big5.c: Likewise.
* iconvdata/big5hkscs.c: Likewise.
* iconvdata/euc-cn.c: Likewise.
* iconvdata/euc-jp.c: Likewise.
* iconvdata/euc-kr.c: Likewise.
* iconvdata/gbgbk.c: Likewise.
* iconvdata/gbk.c: Likewise.
* iconvdata/iso-2022-cn.c: Likewise.
* iconvdata/iso-2022-jp.c: Likewise.
* iconvdata/iso-2022-kr.c: Likewise.
* iconvdata/iso646.c: Likewise.
* iconvdata/iso8859-1.c: Likewise.
* iconvdata/iso_6937-2.c: Likewise.
* iconvdata/iso_6937.c: Likewise.
* iconvdata/johab.c: Likewise.
* iconvdata/sjis.c: Likewise.
* iconvdata/t.61.c: Likewise.
* iconvdata/uhc.c: Likewise.
* iconvdata/unicode.c: Likewise.
* iconvdata/utf-16.c: Likewise.
* libio/fileops.c: Likewise.
* libio/iofwide.c: Likewise.
* wcsmbs/btowc.c: Likewise.
* wcsmbs/mbrtowc.c: Likewise.
* wcsmbs/mbsnrtowcs.c: Likewise.
* wcsmbs/mbsrtowcs.c: Likewise.
* wcsmbs/wcrtomb.c: Likewise.
* wcsmbs/wcsnrtombs.c: Likewise.
* wcsmbs/wcsrtombs.c: Likewise.
* wcsmbs/wctob.c: Likewise.
* iconvdata/ksc5601.h (ksc5601_to_ucs4): Undo *s change in all cases of
Diffstat (limited to 'iconv')
-rw-r--r-- | iconv/gconv.h | 10 | ||||
-rw-r--r-- | iconv/gconv_close.c | 6 | ||||
-rw-r--r-- | iconv/gconv_open.c | 28 | ||||
-rw-r--r-- | iconv/gconv_simple.c | 244 | ||||
-rw-r--r-- | iconv/iconv_prog.c | 48 | ||||
-rw-r--r-- | iconv/loop.c | 9 | ||||
-rw-r--r-- | iconv/skeleton.c | 43 |
7 files changed, 296 insertions, 92 deletions
diff --git a/iconv/gconv.h b/iconv/gconv.h index 804d8daca2..1821844952 100644 --- a/iconv/gconv.h +++ b/iconv/gconv.h @@ -50,6 +50,14 @@ enum }; +/* Flags the `__gconv_open' function can set. */ +enum +{ + __GCONV_IS_LAST = 0x0001, + __GCONV_IGNORE_ERRORS = 0x0002 +}; + + /* Forward declarations. */ struct __gconv_step; struct __gconv_step_data; @@ -103,7 +111,7 @@ struct __gconv_step_data buffer. */ /* Is this the last module in the chain. */ - int __is_last; + int __flags; /* Counter for number of invocations of the module function for this descriptor. */ diff --git a/iconv/gconv_close.c b/iconv/gconv_close.c index a22123b1c9..79dcb0b3a1 100644 --- a/iconv/gconv_close.c +++ b/iconv/gconv_close.c @@ -1,5 +1,5 @@ /* Release any resource associated with given conversion descriptor. - Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. @@ -38,10 +38,10 @@ __gconv_close (__gconv_t cd) drunp = cd->__data; do { - if (!drunp->__is_last && drunp->__outbuf != NULL) + if (!(drunp->__flags & __GCONV_IS_LAST) && drunp->__outbuf != NULL) free (drunp->__outbuf); } - while (!(drunp++)->__is_last); + while (!((drunp++)->__flags & __GCONV_IS_LAST)); /* Free the data allocated for the descriptor. */ free (cd); diff --git a/iconv/gconv_open.c b/iconv/gconv_open.c index 147f5b0e06..da00b1abbd 100644 --- a/iconv/gconv_open.c +++ b/iconv/gconv_open.c @@ -35,6 +35,22 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, __gconv_t result = NULL; size_t cnt = 0; int res; + int conv_flags = 0; + const char *runp; + + /* Find out whether "IGNORE" is part of the options in the `toset' + name. If yes, remove the string and remember this in the flag. */ + runp = __strchrnul (__strchrnul (toset, '/'), '/'); + if (strcmp (runp, "IGNORE") == 0) + { + /* Found it. This means we should ignore conversion errors. */ + char *newtoset = (char *) alloca (runp - toset + 1); + + newtoset[runp - toset] = '\0'; + toset = memcpy (newtoset, toset, runp - toset); + + flags = __GCONV_IGNORE_ERRORS; + } res = __gconv_find_transform (toset, fromset, &steps, &nsteps, flags); if (res == __GCONV_OK) @@ -61,15 +77,19 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, { size_t size; + /* Would have to be done if we would not clear the whole + array above. */ /* If this is the last step we must not allocate an output buffer. */ - result->__data[cnt].__is_last = 0; + result->__data[cnt].__flags = conv_flags; +#if 0 /* Reset the counter. */ result->__data[cnt].__invocation_counter = 0; /* It's a regular use. */ result->__data[cnt].__internal_use = 0; +#endif /* We use the `mbstate_t' member in DATA. */ result->__data[cnt].__statep = &result->__data[cnt].__state; @@ -88,9 +108,13 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, } /* Now handle the last entry. */ - result->__data[cnt].__is_last = 1; + result->__data[cnt].__flags = conv_flags | __GCONV_IS_LAST; + /* Would have to be done if we would not clear the whole + array above. */ +#if 0 result->__data[cnt].__invocation_counter = 0; result->__data[cnt].__internal_use = 0; +#endif result->__data[cnt].__statep = &result->__data[cnt].__state; } diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index 4010a6b326..7189ad3787 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -64,7 +64,7 @@ static const unsigned char encoding_byte[] = static inline int internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted) + mbstate_t *state, int flags, void *data, size_t *converted) { const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; @@ -104,7 +104,8 @@ static inline int internal_ucs4_loop_unaligned (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted) + mbstate_t *state, int flags, void *data, + size_t *converted) { const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; @@ -150,7 +151,8 @@ static inline int internal_ucs4_loop_single (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted) + mbstate_t *state, int flags, void *data, + size_t *converted) { size_t cnt = state->__count & 7; @@ -171,6 +173,8 @@ internal_ucs4_loop_single (const unsigned char **inptrp, (*outptrp)[1] = state->__value.__wchb[2]; (*outptrp)[2] = state->__value.__wchb[1]; (*outptrp)[3] = state->__value.__wchb[0]; + + *outptrp += 4; #elif __BYTE_ORDER == __BIG_ENDIAN /* XXX unaligned */ *(*((uint32_t **) outptrp)++) = state->__value.__wch; @@ -202,7 +206,7 @@ internal_ucs4_loop_single (const unsigned char **inptrp, static inline int ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted) + mbstate_t *state, int flags, void *data, size_t *converted) { const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; @@ -222,6 +226,13 @@ ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend, if (inval > 0x7fffffff) { + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*converted; + continue; + } + *inptrp = inptr; *outptrp = outptr; return __GCONV_ILLEGAL_INPUT; @@ -249,7 +260,8 @@ static inline int ucs4_internal_loop_unaligned (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted) + mbstate_t *state, int flags, void *data, + size_t *converted) { const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; @@ -262,6 +274,13 @@ ucs4_internal_loop_unaligned (const unsigned char **inptrp, if (inptr[0] > 0x80) { /* The value is too large. */ + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*converted; + continue; + } + *inptrp = inptr; *outptrp = outptr; return __GCONV_ILLEGAL_INPUT; @@ -312,7 +331,8 @@ static inline int ucs4_internal_loop_single (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted) + mbstate_t *state, int flags, void *data, + size_t *converted) { size_t cnt = state->__count & 7; @@ -329,21 +349,28 @@ ucs4_internal_loop_single (const unsigned char **inptrp, } if (((unsigned char *) state->__value.__wchb)[0] > 0x80) - /* The value is too large. */ - return __GCONV_ILLEGAL_INPUT; - + { + /* The value is too large. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) + return __GCONV_ILLEGAL_INPUT; + } + else + { #if __BYTE_ORDER == __LITTLE_ENDIAN - (*outptrp)[0] = state->__value.__wchb[3]; - (*outptrp)[1] = state->__value.__wchb[2]; - (*outptrp)[2] = state->__value.__wchb[1]; - (*outptrp)[3] = state->__value.__wchb[0]; + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; #elif __BYTE_ORDER == __BIG_ENDIAN - (*outptrp)[0] = state->__value.__wchb[0]; - (*outptrp)[1] = state->__value.__wchb[1]; - (*outptrp)[2] = state->__value.__wchb[2]; - (*outptrp)[3] = state->__value.__wchb[3]; + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; #endif + *outptrp += 4; + } + /* Clear the state buffer. */ state->__count &= ~7; @@ -367,7 +394,8 @@ ucs4_internal_loop_single (const unsigned char **inptrp, static inline int internal_ucs4le_loop (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted) + mbstate_t *state, int flags, void *data, + size_t *converted) { const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; @@ -407,7 +435,7 @@ static inline int internal_ucs4le_loop_unaligned (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, + mbstate_t *state, int flags, void *data, size_t *converted) { const unsigned char *inptr = *inptrp; @@ -454,7 +482,8 @@ static inline int internal_ucs4le_loop_single (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted) + mbstate_t *state, int flags, void *data, + size_t *converted) { size_t cnt = state->__count & 7; @@ -475,6 +504,8 @@ internal_ucs4le_loop_single (const unsigned char **inptrp, (*outptrp)[1] = state->__value.__wchb[2]; (*outptrp)[2] = state->__value.__wchb[1]; (*outptrp)[3] = state->__value.__wchb[0]; + + *outptrp += 4; #else /* XXX unaligned */ *(*((uint32_t **) outptrp)++) = state->__value.__wch; @@ -503,7 +534,8 @@ internal_ucs4le_loop_single (const unsigned char **inptrp, static inline int ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted) + mbstate_t *state, int flags, void *data, + size_t *converted) { const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; @@ -522,7 +554,16 @@ ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend, #endif if (inval > 0x7fffffff) - return __GCONV_ILLEGAL_INPUT; + { + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*converted; + continue; + } + + return __GCONV_ILLEGAL_INPUT; + } *((uint32_t *) outptr)++ = bswap_32 (*(uint32_t *) inptr); } @@ -546,7 +587,7 @@ static inline int ucs4le_internal_loop_unaligned (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, + mbstate_t *state, int flags, void *data, size_t *converted) { const unsigned char *inptr = *inptrp; @@ -560,12 +601,18 @@ ucs4le_internal_loop_unaligned (const unsigned char **inptrp, if (inptr[3] > 0x80) { /* The value is too large. */ + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*converted; + continue; + } + *inptrp = inptr; *outptrp = outptr; return __GCONV_ILLEGAL_INPUT; } - # if __BYTE_ORDER == __BIG_ENDIAN outptr[3] = inptr[0]; outptr[2] = inptr[1]; @@ -577,6 +624,8 @@ ucs4le_internal_loop_unaligned (const unsigned char **inptrp, outptr[2] = inptr[2]; outptr[3] = inptr[3]; # endif + + outptr += 4; } *inptrp = inptr; @@ -599,7 +648,8 @@ static inline int ucs4le_internal_loop_single (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted) + mbstate_t *state, int flags, void *data, + size_t *converted) { size_t cnt = state->__count & 7; @@ -616,21 +666,28 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, } if (((unsigned char *) state->__value.__wchb)[3] > 0x80) - /* The value is too large. */ - return __GCONV_ILLEGAL_INPUT; - + { + /* The value is too large. */ + if (!(flags & __GCONV_IGNORE_ERRORS)) + return __GCONV_ILLEGAL_INPUT; + } + else + { #if __BYTE_ORDER == __BIG_ENDIAN - (*outptrp)[0] = state->__value.__wchb[3]; - (*outptrp)[1] = state->__value.__wchb[2]; - (*outptrp)[2] = state->__value.__wchb[1]; - (*outptrp)[3] = state->__value.__wchb[0]; + (*outptrp)[0] = state->__value.__wchb[3]; + (*outptrp)[1] = state->__value.__wchb[2]; + (*outptrp)[2] = state->__value.__wchb[1]; + (*outptrp)[3] = state->__value.__wchb[0]; #elif __BYTE_ORDER == __BIG_ENDIAN - (*outptrp)[0] = state->__value.__wchb[0]; - (*outptrp)[1] = state->__value.__wchb[1]; - (*outptrp)[2] = state->__value.__wchb[2]; - (*outptrp)[3] = state->__value.__wchb[3]; + (*outptrp)[0] = state->__value.__wchb[0]; + (*outptrp)[1] = state->__value.__wchb[1]; + (*outptrp)[2] = state->__value.__wchb[2]; + (*outptrp)[3] = state->__value.__wchb[3]; #endif + *outptrp += 4; + } + /* Clear the state buffer. */ state->__count &= ~7; @@ -658,14 +715,20 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, { \ if (*inptr > '\x7f') \ { \ - /* This is no correct ANSI_X3.4-1968 character. */ \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + if (! ignore_errors_p ()) \ + { \ + /* This is no correct ANSI_X3.4-1968 character. */ \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ \ - /* It's an one byte sequence. */ \ - /* XXX unaligned. */ \ - *((uint32_t *) outptr)++ = *inptr++; \ + ++*converted; \ + ++inptr; \ + } \ + else \ + /* It's an one byte sequence. */ \ + /* XXX unaligned. */ \ + *((uint32_t *) outptr)++ = *inptr++; \ } #include <iconv/loop.c> #include <iconv/skeleton.c> @@ -689,13 +752,20 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, { \ if (*((uint32_t *) inptr) > 0x7f) \ { \ - /* This is no correct ANSI_X3.4-1968 character. */ \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + if (! ignore_errors_p ()) \ + { \ + /* This is no correct ANSI_X3.4-1968 character. */ \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ \ - /* It's an one byte sequence. */ \ - *outptr++ = *((uint32_t *) inptr)++; \ + ++*converted; \ + inptr += 4; \ + } \ + else \ + /* It's an one byte sequence. */ \ + /* XXX unaligned. */ \ + *outptr++ = *((uint32_t *) inptr)++; \ } #include <iconv/loop.c> #include <iconv/skeleton.c> @@ -829,9 +899,26 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, } \ else \ { \ - /* This is an illegal encoding. */ \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ + int skipped; \ + \ + if (! ignore_errors_p ()) \ + { \ + /* This is an illegal encoding. */ \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + /* Search the end of this ill-formed UTF-8 character. This \ + is the next byte with (x & 0xc0) != 0x80. */ \ + skipped = 0; \ + do \ + { \ + ++inptr; \ + ++skipped; \ + } \ + while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \ + \ + continue; \ } \ \ if (NEED_LENGTH_TEST && inptr + cnt > inend) \ @@ -841,8 +928,23 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, for (i = 1; inptr + i < inend; ++i) \ if ((inptr[i] & 0xc0) != 0x80) \ break; \ - result = (inptr + i == inend \ - ? __GCONV_INCOMPLETE_INPUT : __GCONV_ILLEGAL_INPUT); \ + \ + if (inptr + i == inend) \ + { \ + result = __GCONV_INCOMPLETE_INPUT; \ + break; \ + } \ + \ + /* This is an illegal character. */ \ + if (ignore_errors_p ()) \ + { \ + /* Ignore it. */ \ + inptr += i; \ + ++*converted; \ + continue; \ + } \ + \ + result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ \ @@ -858,13 +960,20 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, ch <<= 6; \ ch |= byte & 0x3f; \ } \ - \ + \ /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ have been represented with fewer than cnt bytes. */ \ - if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \ + if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \ { \ /* This is an illegal encoding. */ \ + if (ignore_errors_p ()) \ + { \ + inptr += i; \ + ++*converted; \ + continue; \ + } \ + \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ @@ -1021,10 +1130,17 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, { \ if (*((uint32_t *) inptr) >= 0x10000) \ { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += 4; \ + ++*converted; \ } \ - *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \ + else \ + *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \ } #include <iconv/loop.c> #include <iconv/skeleton.c> @@ -1070,8 +1186,14 @@ ucs4le_internal_loop_single (const unsigned char **inptrp, uint32_t val = *((uint32_t *) inptr); \ if (val >= 0x10000) \ { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + \ + inptr += 4; \ + ++*converted; \ } \ *((uint16_t *) outptr)++ = bswap_16 (val); \ inptr += 4; \ diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index 1ac23e019c..56a2014268 100644 --- a/iconv/iconv_prog.c +++ b/iconv/iconv_prog.c @@ -97,6 +97,9 @@ static int verbose; /* Nonzero if list of all coded character sets is wanted. */ static int list; +/* If nonzero omit invalid character from output. */ +static int omit_invalid; + /* Prototypes for the functions doing the actual work. */ static int process_block (iconv_t cd, char *addr, size_t len, FILE *output); static int process_fd (iconv_t cd, int fd, FILE *output); @@ -111,6 +114,7 @@ main (int argc, char *argv[]) int remaining; FILE *output; iconv_t cd; + const char *orig_to_code; /* Set locale via LC_ALL. */ setlocale (LC_ALL, ""); @@ -138,15 +142,48 @@ main (int argc, char *argv[]) if (to_code == NULL) error (EXIT_FAILURE, 0, _("target encoding not specified using `-t'")); + /* If we have to ignore errors make sure we use the appropriate name for + the to-character-set. */ + orig_to_code = to_code; + if (omit_invalid) + { + const char *errhand = strchrnul (to_code, '/'); + int nslash = 2; + char *newp; + char *cp; + + if (*errhand == '/') + { + --nslash; + errhand = strchrnul (errhand, '/'); + + if (*errhand == '/') + { + --nslash; + ++errhand; + } + } + + newp = (char *) alloca (errhand - to_code + nslash + 6 + 1); + cp = mempcpy (newp, to_code, errhand - to_code); + while (nslash > 0) + *cp++ = '/'; + memcpy (cp, "NEEDED", sizeof ("NEEDED")); + + to_code = newp; + } + /* Let's see whether we have these coded character sets. */ cd = iconv_open (to_code, from_code); if (cd == (iconv_t) -1) { if (errno == EINVAL) - error (EXIT_FAILURE, 0, _("conversion from `%s' to `%s' not supported"), - from_code, to_code); + error (EXIT_FAILURE, 0, + _("conversion from `%s' to `%s' not supported"), + from_code, orig_to_code); else - error (EXIT_FAILURE, errno, _("failed to start conversion processing")); + error (EXIT_FAILURE, errno, + _("failed to start conversion processing")); } /* Determine output file. */ @@ -274,9 +311,8 @@ parse_opt (int key, char *arg, struct argp_state *state) about missing character or so. */ break; case 'c': - /* Omit invalid characters from output. - XXX This option will become a meaning once we have different - modes of operation for the conversion functions. */ + /* Omit invalid characters from output. */ + omit_invalid = 1; break; case OPT_VERBOSE: verbose = 1; diff --git a/iconv/loop.c b/iconv/loop.c index 9c5dbfca77..9e8e32e8cc 100644 --- a/iconv/loop.c +++ b/iconv/loop.c @@ -168,11 +168,16 @@ #endif +/* To make it easier for the writers of the modules, we define a macro + to test whether we have to ignore errors. */ +#define ignore_errors_p() (flags & __GCONV_IGNORE_ERRORS) + + /* The function returns the status, as defined in gconv.h. */ static inline int FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted + mbstate_t *state, int flags, void *data, size_t *converted EXTRA_LOOP_DECLS) { int result = __GCONV_OK; @@ -285,7 +290,7 @@ FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, static inline int SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, - mbstate_t *state, void *data, size_t *converted + mbstate_t *state, int flags, void *data, size_t *converted EXTRA_LOOP_DECLS) { int result = __GCONV_OK; diff --git a/iconv/skeleton.c b/iconv/skeleton.c index 81677cc7d9..6069104f52 100644 --- a/iconv/skeleton.c +++ b/iconv/skeleton.c @@ -275,9 +275,11 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, { struct __gconv_step *next_step = step + 1; struct __gconv_step_data *next_data = data + 1; - __gconv_fct fct = data->__is_last ? NULL : next_step->__fct; + __gconv_fct fct; int status; + fct = (data->__flags & __GCONV_IS_LAST) ? NULL : next_step->__fct; + /* If the function is called with no input this means we have to reset to the initial state. The possibly partly converted input is dropped. */ @@ -291,7 +293,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, #endif /* Call the steps down the chain if there are any but only if we successfully emitted the escape sequence. */ - if (status == __GCONV_OK && ! data->__is_last) + if (status == __GCONV_OK && ! (data->__flags & __GCONV_IS_LAST)) status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL, written, 1, consume_incomplete)); } @@ -332,16 +334,18 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, # if MAX_NEEDED_FROM > 1 if (MAX_NEEDED_TO == 1 || FROM_DIRECTION) status = SINGLE(FROM_LOOP) (inptrp, inend, &outbuf, outend, - data->__statep, step->__data, - &converted EXTRA_LOOP_ARGS); + data->__statep, data->__flags, + step->__data, &converted + EXTRA_LOOP_ARGS); # endif # if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1 && !ONE_DIRECTION else # endif # if MAX_NEEDED_TO > 1 && !ONE_DIRECTION status = SINGLE(TO_LOOP) (inptrp, inend, &outbuf, outend, - data->__statep, step->__data, - &converted EXTRA_LOOP_ARGS); + data->__statep, data->__flags, + step->__data, &converted + EXTRA_LOOP_ARGS); # endif if (status != __GCONV_OK) @@ -359,10 +363,10 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, for all known and supported encodings. */ unaligned = ((FROM_DIRECTION && ((uintptr_t) inptr % MIN_NEEDED_FROM != 0 - || (data->__is_last + || ((data->__flags & __GCONV_IS_LAST) && (uintptr_t) outbuf % MIN_NEEDED_TO != 0))) || (!FROM_DIRECTION - && ((data->__is_last + && (((data->__flags & __GCONV_IS_LAST) && (uintptr_t) outbuf % MIN_NEEDED_FROM != 0) || (uintptr_t) inptr % MIN_NEEDED_TO != 0))); #endif @@ -383,13 +387,13 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, if (FROM_DIRECTION) /* Run the conversion loop. */ status = FROM_LOOP (inptrp, inend, &outbuf, outend, - data->__statep, step->__data, &converted - EXTRA_LOOP_ARGS); + data->__statep, data->__flags, + step->__data, &converted EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ status = TO_LOOP (inptrp, inend, &outbuf, outend, - data->__statep, step->__data, &converted - EXTRA_LOOP_ARGS); + data->__statep, data->__flags, + step->__data, &converted EXTRA_LOOP_ARGS); } #if !defined _STRING_ARCH_unaligned \ && MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \ @@ -400,12 +404,14 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, /* Run the conversion loop. */ status = GEN_unaligned (FROM_LOOP) (inptrp, inend, &outbuf, outend, data->__statep, + data->__flags, step->__data, &converted EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ status = GEN_unaligned (TO_LOOP) (inptrp, inend, &outbuf, outend, data->__statep, + data->__flags, step->__data, &converted EXTRA_LOOP_ARGS); } @@ -416,7 +422,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, /* If this is the last step leave the loop, there is nothing we can do. */ - if (data->__is_last) + if (data->__flags & __GCONV_IS_LAST) { /* Store information about how many bytes are available. */ data->__outbuf = outbuf; @@ -457,22 +463,25 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, SAVE_RESET_STATE (0); # endif + /* XXX Handle unaligned access here as well. */ if (FROM_DIRECTION) /* Run the conversion loop. */ nstatus = FROM_LOOP ((const unsigned char **) inptrp, (const unsigned char *) inend, (unsigned char **) &outbuf, (unsigned char *) outerr, - data->__statep, step->__data, - &converted EXTRA_LOOP_ARGS); + data->__statep, data->__flags, + step->__data, &converted + EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ nstatus = TO_LOOP ((const unsigned char **) inptrp, (const unsigned char *) inend, (unsigned char **) &outbuf, (unsigned char *) outerr, - data->__statep, step->__data, - &converted EXTRA_LOOP_ARGS); + data->__statep, data->__flags, + step->__data, &converted + EXTRA_LOOP_ARGS); /* We must run out of output buffer space in this rerun. */ |