aboutsummaryrefslogtreecommitdiff
path: root/iconv
diff options
context:
space:
mode:
Diffstat (limited to 'iconv')
-rw-r--r--iconv/Makefile2
-rw-r--r--iconv/gconv.c8
-rw-r--r--iconv/gconv.h39
-rw-r--r--iconv/gconv_int.h48
-rw-r--r--iconv/gconv_open.c102
-rw-r--r--iconv/gconv_simple.c177
-rw-r--r--iconv/gconv_trans.c50
-rw-r--r--iconv/loop.c129
-rw-r--r--iconv/skeleton.c60
9 files changed, 427 insertions, 188 deletions
diff --git a/iconv/Makefile b/iconv/Makefile
index 14076e6f6e..6af661c964 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -26,7 +26,7 @@ include ../Makeconfig
headers = iconv.h gconv.h
routines = iconv_open iconv iconv_close \
gconv_open gconv gconv_close gconv_db gconv_conf \
- gconv_builtin gconv_simple
+ gconv_builtin gconv_simple gconv_trans
ifeq ($(elf),yes)
routines += gconv_dl
else
diff --git a/iconv/gconv.c b/iconv/gconv.c
index 06e212b2cb..19f95622a2 100644
--- a/iconv/gconv.c
+++ b/iconv/gconv.c
@@ -46,8 +46,8 @@ __gconv (__gconv_t cd, const unsigned char **inbuf,
if (inbuf == NULL || *inbuf == NULL)
/* We just flush. */
result = DL_CALL_FCT (cd->__steps->__fct,
- (cd->__steps, cd->__data, NULL, NULL,
- irreversible, 1, 0));
+ (cd->__steps, cd->__data, NULL, NULL,
+ cd->__data[0].__outbuf, irreversible, 1, 0));
else
{
const unsigned char *last_start;
@@ -58,8 +58,8 @@ __gconv (__gconv_t cd, const unsigned char **inbuf,
{
last_start = *inbuf;
result = DL_CALL_FCT (cd->__steps->__fct,
- (cd->__steps, cd->__data, inbuf, inbufend,
- irreversible, 0, 0));
+ (cd->__steps, cd->__data, inbuf, inbufend,
+ cd->__data[0].__outbuf, irreversible, 0, 0));
}
while (result == __GCONV_EMPTY_INPUT && last_start != *inbuf
&& *inbuf + cd->__steps->__min_needed_from <= inbufend);
diff --git a/iconv/gconv.h b/iconv/gconv.h
index 1821844952..5717ddbea0 100644
--- a/iconv/gconv.h
+++ b/iconv/gconv.h
@@ -62,18 +62,52 @@ enum
struct __gconv_step;
struct __gconv_step_data;
struct __gconv_loaded_object;
+struct __gconv_trans_data;
/* Type of a conversion function. */
typedef int (*__gconv_fct) (struct __gconv_step *, struct __gconv_step_data *,
__const unsigned char **, __const unsigned char *,
- size_t *, int, int);
+ unsigned char *, size_t *, int, int);
/* Constructor and destructor for local data for conversion step. */
typedef int (*__gconv_init_fct) (struct __gconv_step *);
typedef void (*__gconv_end_fct) (struct __gconv_step *);
+/* Type of a transliteration/transscription function. */
+typedef int (*__gconv_trans_fct) (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ __const unsigned char *,
+ __const unsigned char **,
+ __const unsigned char *, unsigned char *,
+ unsigned char **, unsigned char *, size_t *);
+
+/* Function to call to provide transliteration module with context. */
+typedef int (*__gconv_trans_context_fct) (struct __gconv_trans_data *data,
+ __const unsigned char *,
+ __const unsigned char *,
+ __const unsigned char *,
+ unsigned char *, unsigned char *,
+ unsigned char *);
+
+/* Function to query module about supported encoded character sets. */
+typedef int (*__gconv_trans_query_fct) (__const char **, size_t *);
+
+/* Constructor and destructor for local data for transliteration. */
+typedef int (*__gconv_trans_init_fct) (void **, const char *);
+typedef void (*__gconv_trans_end_fct) (void *);
+
+struct __gconv_trans_data
+{
+ /* Transliteration/Transscription function. */
+ __gconv_trans_fct __trans_fct;
+ __gconv_trans_context_fct __trans_context_fct;
+ __gconv_trans_end_fct __trans_end_fct;
+ void *__data;
+};
+
+
/* Description of a conversion step. */
struct __gconv_step
{
@@ -124,6 +158,9 @@ struct __gconv_step_data
__mbstate_t *__statep;
__mbstate_t __state; /* This element must not be used directly by
any module; always use STATEP! */
+
+ /* Transliteration information. */
+ struct __gconv_trans_data __trans;
};
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 01cebe72c4..87287d7b26 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -129,8 +129,8 @@ extern struct gconv_module *__gconv_modules_db;
/* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */
-extern int __gconv_open (const char *__toset, const char *__fromset,
- __gconv_t *__handle, int flags)
+extern int __gconv_open (const char *toset, const char *fromset,
+ __gconv_t *handle, int flags)
internal_function;
/* Free resources associated with transformation descriptor CD. */
@@ -141,55 +141,65 @@ extern int __gconv_close (__gconv_t cd)
according to rules described by CD and place up to *OUTBYTESLEFT
bytes in buffer starting at *OUTBUF. Return number of non-identical
conversions in *IRREVERSIBLE if this pointer is not null. */
-extern int __gconv (__gconv_t __cd, const unsigned char **__inbuf,
- const unsigned char *inbufend, unsigned char **__outbuf,
+extern int __gconv (__gconv_t cd, const unsigned char **inbuf,
+ const unsigned char *inbufend, unsigned char **outbuf,
unsigned char *outbufend, size_t *irreversible)
internal_function;
/* Return in *HANDLE a pointer to an array with *NSTEPS elements describing
the single steps necessary for transformation from FROMSET to TOSET. */
-extern int __gconv_find_transform (const char *__toset, const char *__fromset,
- struct __gconv_step **__handle,
- size_t *__nsteps, int flags)
+extern int __gconv_find_transform (const char *toset, const char *fromset,
+ struct __gconv_step **handle,
+ size_t *nsteps, int flags)
internal_function;
/* Read all the configuration data and cache it. */
extern void __gconv_read_conf (void);
/* Comparison function to search alias. */
-extern int __gconv_alias_compare (const void *__p1, const void *__p2);
+extern int __gconv_alias_compare (const void *p1, const void *p2);
/* Clear reference to transformation step implementations which might
cause the code to be unloaded. */
-extern int __gconv_close_transform (struct __gconv_step *__steps,
- size_t __nsteps)
+extern int __gconv_close_transform (struct __gconv_step *steps,
+ size_t nsteps)
internal_function;
/* Load shared object named by NAME. If already loaded increment reference
count. */
-extern struct __gconv_loaded_object *__gconv_find_shlib (const char *__name)
+extern struct __gconv_loaded_object *__gconv_find_shlib (const char *name)
internal_function;
/* Release shared object. If no further reference is available unload
the object. */
-extern int __gconv_release_shlib (struct __gconv_loaded_object *__handle)
+extern int __gconv_release_shlib (struct __gconv_loaded_object *handle)
internal_function;
/* Fill STEP with information about builtin module with NAME. */
-extern void __gconv_get_builtin_trans (const char *__name,
- struct __gconv_step *__step)
+extern void __gconv_get_builtin_trans (const char *name,
+ struct __gconv_step *step)
internal_function;
+/* Transliteration using the locale's data. */
+extern int gconv_transliterate (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ __const unsigned char *inbufstart,
+ __const unsigned char **inbufp,
+ __const unsigned char *inbufend,
+ unsigned char *outbufstart,
+ unsigned char **outbufp,
+ unsigned char *outbufend,
+ size_t *irreversible);
/* Builtin transformations. */
#ifdef _LIBC
# define __BUILTIN_TRANS(Name) \
- extern int Name (struct __gconv_step *__step, \
- struct __gconv_step_data *__data, \
- const unsigned char **__inbuf, \
- const unsigned char *__inbufend, size_t *__written, \
- int __do_flush, int __consume_incomplete)
+ extern int Name (struct __gconv_step *step, \
+ struct __gconv_step_data *data, \
+ const unsigned char **inbuf, \
+ const unsigned char *inbufend, unsigned char *outbufstart, \
+ size_t *irreversible, int do_flush, int consume_incomplete)
__BUILTIN_TRANS (__gconv_transform_ascii_internal);
__BUILTIN_TRANS (__gconv_transform_internal_ascii);
diff --git a/iconv/gconv_open.c b/iconv/gconv_open.c
index da00b1abbd..984ca9dc5d 100644
--- a/iconv/gconv_open.c
+++ b/iconv/gconv_open.c
@@ -36,25 +36,65 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle,
size_t cnt = 0;
int res;
int conv_flags = 0;
- const char *runp;
+ const char *errhand;
- /* Find out whether "IGNORE" is part of the options in the `toset'
- name. If yes, remove the string and remember this in the flag. */
- runp = __strchrnul (__strchrnul (toset, '/'), '/');
- if (strcmp (runp, "IGNORE") == 0)
+ /* Find out whether any error handling method is specified. */
+ errhand = strchr (toset, '/');
+ if (errhand != NULL)
+ errhand = strchr (errhand + 1, '/');
+ if (__builtin_expect (errhand != NULL, 1))
{
- /* Found it. This means we should ignore conversion errors. */
- char *newtoset = (char *) alloca (runp - toset + 1);
+ if (errhand[1] == '\0')
+ errhand = NULL;
+ else
+ {
+ /* Make copy without the error handling description. */
+ char *newtoset = (char *) alloca (errhand - toset + 1);
- newtoset[runp - toset] = '\0';
- toset = memcpy (newtoset, toset, runp - toset);
+ newtoset[errhand - toset] = '\0';
+ toset = memcpy (newtoset, toset, errhand - toset);
- flags = __GCONV_IGNORE_ERRORS;
+ flags = __GCONV_IGNORE_ERRORS;
+
+ if (strcasecmp (errhand, "IGNORE") == 0)
+ {
+ /* Found it. This means we should ignore conversion errors. */
+ flags = __GCONV_IGNORE_ERRORS;
+ errhand = NULL;
+ }
+ }
}
res = __gconv_find_transform (toset, fromset, &steps, &nsteps, flags);
if (res == __GCONV_OK)
{
+ const char **csnames = NULL;
+ size_t ncsnames = 0;
+ __gconv_trans_fct trans_fct = NULL;
+ __gconv_trans_context_fct trans_context_fct = NULL;
+ __gconv_trans_init_fct trans_init_fct = NULL;
+ __gconv_trans_end_fct trans_end_fct = NULL;
+
+ if (errhand != NULL)
+ {
+ /* Find the appropriate transliteration handling. */
+ if (strcasecmp (errhand, "TRANSLIT") == 0)
+ {
+ /* It's the builtin transliteration handling. We only
+ suport for it working on the internal encoding. */
+ static const char *internal_trans_names[1] = { "INTERNAL" };
+
+ csnames = internal_trans_names;
+ ncsnames = 1;
+ trans_fct = gconv_transliterate;
+ /* No context, init, or end function. */
+ }
+ else if (strcasecmp (errhand, "WORK AROUND A GCC BUG") == 0)
+ {
+ trans_init_fct = (__gconv_trans_init_fct) 1;
+ }
+ }
+
/* Allocate room for handle. */
result = (__gconv_t) malloc (sizeof (struct __gconv_info)
+ (nsteps
@@ -63,6 +103,8 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle,
res = __GCONV_NOMEM;
else
{
+ size_t n;
+
/* Remember the list of steps. */
result->__steps = steps;
result->__nsteps = nsteps;
@@ -105,6 +147,26 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle,
}
result->__data[cnt].__outbufend =
result->__data[cnt].__outbuf + size;
+
+ /* Now see whether we can use the transliteration module
+ for this step. */
+ for (n = 0; n < ncsnames; ++n)
+ if (strcasecmp (steps[cnt].__from_name, csnames[n]) == 0)
+ {
+ /* Match! Now try the initializer. */
+ if (trans_init_fct == NULL
+ || (trans_init_fct (&result->__data[cnt].__trans.__data,
+ steps[cnt].__to_name)
+ == __GCONV_OK))
+ {
+ result->__data[cnt].__trans.__trans_fct = trans_fct;
+ result->__data[cnt].__trans.__trans_context_fct =
+ trans_context_fct;
+ result->__data[cnt].__trans.__trans_end_fct =
+ trans_end_fct;
+ }
+ break;
+ }
}
/* Now handle the last entry. */
@@ -116,6 +178,26 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle,
result->__data[cnt].__internal_use = 0;
#endif
result->__data[cnt].__statep = &result->__data[cnt].__state;
+
+ /* Now see whether we can use the transliteration module
+ for this step. */
+ for (n = 0; n < ncsnames; ++n)
+ if (strcasecmp (steps[cnt].__from_name, csnames[n]) == 0)
+ {
+ /* Match! Now try the initializer. */
+ if (trans_init_fct == NULL
+ || trans_init_fct (&result->__data[cnt].__trans.__data,
+ steps[cnt].__to_name)
+ == __GCONV_OK)
+ {
+ result->__data[cnt].__trans.__trans_fct = trans_fct;
+ result->__data[cnt].__trans.__trans_context_fct =
+ trans_context_fct;
+ result->__data[cnt].__trans.__trans_end_fct =
+ trans_end_fct;
+ }
+ break;
+ }
}
if (res != __GCONV_OK)
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 4b7004caa6..188fc04c0e 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -19,6 +19,7 @@
Boston, MA 02111-1307, USA. */
#include <byteswap.h>
+#include <dlfcn.h>
#include <endian.h>
#include <errno.h>
#include <gconv.h>
@@ -62,9 +63,10 @@ static const unsigned char encoding_byte[] =
static inline int
-internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend,
+internal_ucs4_loop (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp, const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
const unsigned char *inptr = *inptrp;
@@ -102,10 +104,11 @@ internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend,
#ifndef _STRING_ARCH_unaligned
static inline int
-internal_ucs4_loop_unaligned (const unsigned char **inptrp,
+internal_ucs4_loop_unaligned (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp,
const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
const unsigned char *inptr = *inptrp;
@@ -149,12 +152,14 @@ internal_ucs4_loop_unaligned (const unsigned char **inptrp,
static inline int
-internal_ucs4_loop_single (const unsigned char **inptrp,
+internal_ucs4_loop_single (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp,
const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
+ mbstate_t *state = step_data->__statep;
size_t cnt = state->__count & 7;
while (*inptrp < inend && cnt < 4)
@@ -205,11 +210,13 @@ internal_ucs4_loop_single (const unsigned char **inptrp,
static inline int
-ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
+ucs4_internal_loop (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp, const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
+ int flags = step_data->__flags;
const unsigned char *inptr = *inptrp;
unsigned char *outptr = *outptrp;
size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
@@ -228,6 +235,10 @@ ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
if (__builtin_expect (inval, 0) > 0x7fffffff)
{
+ /* The value is too large. We don't try transliteration here since
+ this is not an error because of the lack of possibilities to
+ represent the result. This is a genuine bug in the input since
+ UCS4 does not allow such values. */
if (flags & __GCONV_IGNORE_ERRORS)
{
/* Just ignore this character. */
@@ -259,23 +270,28 @@ ucs4_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
#ifndef _STRING_ARCH_unaligned
static inline int
-ucs4_internal_loop_unaligned (const unsigned char **inptrp,
+ucs4_internal_loop_unaligned (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp,
const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
+ int flags = step_data->__flags;
const unsigned char *inptr = *inptrp;
unsigned char *outptr = *outptrp;
size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
int result;
size_t cnt;
- for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
+ for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
{
if (__builtin_expect (inptr[0], 0) > 0x80)
{
- /* The value is too large. */
+ /* The value is too large. We don't try transliteration here since
+ this is not an error because of the lack of possibilities to
+ represent the result. This is a genuine bug in the input since
+ UCS4 does not allow such values. */
if (flags & __GCONV_IGNORE_ERRORS)
{
/* Just ignore this character. */
@@ -299,6 +315,7 @@ ucs4_internal_loop_unaligned (const unsigned char **inptrp,
outptr[2] = inptr[2];
outptr[3] = inptr[3];
# endif
+ outptr += 4;
}
*inptrp = inptr;
@@ -318,12 +335,15 @@ ucs4_internal_loop_unaligned (const unsigned char **inptrp,
static inline int
-ucs4_internal_loop_single (const unsigned char **inptrp,
+ucs4_internal_loop_single (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp,
const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
+ mbstate_t *state = step_data->__statep;
+ int flags = step_data->__flags;
size_t cnt = state->__count & 7;
while (*inptrp < inend && cnt < 4)
@@ -341,7 +361,10 @@ ucs4_internal_loop_single (const unsigned char **inptrp,
if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0], 0)
> 0x80)
{
- /* The value is too large. */
+ /* The value is too large. We don't try transliteration here since
+ this is not an error because of the lack of possibilities to
+ represent the result. This is a genuine bug in the input since
+ UCS4 does not allow such values. */
if (!(flags & __GCONV_IGNORE_ERRORS))
{
*inptrp -= cnt - (state->__count & 7);
@@ -386,9 +409,10 @@ ucs4_internal_loop_single (const unsigned char **inptrp,
static inline int
-internal_ucs4le_loop (const unsigned char **inptrp, const unsigned char *inend,
+internal_ucs4le_loop (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp, const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
const unsigned char *inptr = *inptrp;
@@ -426,10 +450,11 @@ internal_ucs4le_loop (const unsigned char **inptrp, const unsigned char *inend,
#ifndef _STRING_ARCH_unaligned
static inline int
-internal_ucs4le_loop_unaligned (const unsigned char **inptrp,
+internal_ucs4le_loop_unaligned (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp,
const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
const unsigned char *inptr = *inptrp;
@@ -473,12 +498,14 @@ internal_ucs4le_loop_unaligned (const unsigned char **inptrp,
static inline int
-internal_ucs4le_loop_single (const unsigned char **inptrp,
+internal_ucs4le_loop_single (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp,
const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
+ mbstate_t *state = step_data->__statep;
size_t cnt = state->__count & 7;
while (*inptrp < inend && cnt < 4)
@@ -526,11 +553,13 @@ internal_ucs4le_loop_single (const unsigned char **inptrp,
static inline int
-ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
+ucs4le_internal_loop (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp, const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
+ int flags = step_data->__flags;
const unsigned char *inptr = *inptrp;
unsigned char *outptr = *outptrp;
size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
@@ -549,6 +578,10 @@ ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
if (__builtin_expect (inval, 0) > 0x7fffffff)
{
+ /* The value is too large. We don't try transliteration here since
+ this is not an error because of the lack of possibilities to
+ represent the result. This is a genuine bug in the input since
+ UCS4 does not allow such values. */
if (flags & __GCONV_IGNORE_ERRORS)
{
/* Just ignore this character. */
@@ -578,12 +611,14 @@ ucs4le_internal_loop (const unsigned char **inptrp, const unsigned char *inend,
#ifndef _STRING_ARCH_unaligned
static inline int
-ucs4le_internal_loop_unaligned (const unsigned char **inptrp,
+ucs4le_internal_loop_unaligned (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp,
const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
+ int flags = step_data->__flags;
const unsigned char *inptr = *inptrp;
unsigned char *outptr = *outptrp;
size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
@@ -594,7 +629,10 @@ ucs4le_internal_loop_unaligned (const unsigned char **inptrp,
{
if (__builtin_expect (inptr[3], 0) > 0x80)
{
- /* The value is too large. */
+ /* The value is too large. We don't try transliteration here since
+ this is not an error because of the lack of possibilities to
+ represent the result. This is a genuine bug in the input since
+ UCS4 does not allow such values. */
if (flags & __GCONV_IGNORE_ERRORS)
{
/* Just ignore this character. */
@@ -639,12 +677,15 @@ ucs4le_internal_loop_unaligned (const unsigned char **inptrp,
static inline int
-ucs4le_internal_loop_single (const unsigned char **inptrp,
+ucs4le_internal_loop_single (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp,
const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible)
{
+ mbstate_t *state = step_data->__statep;
+ int flags = step_data->__flags;
size_t cnt = state->__count & 7;
while (*inptrp < inend && cnt < 4)
@@ -662,7 +703,10 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3], 0)
> 0x80)
{
- /* The value is too large. */
+ /* The value is too large. We don't try transliteration here since
+ this is not an error because of the lack of possibilities to
+ represent the result. This is a genuine bug in the input since
+ UCS4 does not allow such values. */
if (!(flags & __GCONV_IGNORE_ERRORS))
return __GCONV_ILLEGAL_INPUT;
}
@@ -710,6 +754,10 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
{ \
if (__builtin_expect (*inptr, 0) > '\x7f') \
{ \
+ /* The value is too large. We don't try transliteration here since \
+ this is not an error because of the lack of possibilities to \
+ represent the result. This is a genuine bug in the input since \
+ ASCII does not allow such values. */ \
if (! ignore_errors_p ()) \
{ \
/* This is no correct ANSI_X3.4-1968 character. */ \
@@ -718,13 +766,14 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
} \
\
++*irreversible; \
- ++inptr; \
+ ++inptr; \
} \
else \
/* It's an one byte sequence. */ \
/* XXX unaligned. */ \
*((uint32_t *) outptr)++ = *inptr++; \
}
+#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>
@@ -740,6 +789,13 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
#define FUNCTION_NAME __gconv_transform_internal_ascii
#define ONE_DIRECTION 1
+extern int FUNCTION_NAME (struct __gconv_step *step,
+ struct __gconv_step_data *data,
+ const unsigned char **inptrp,
+ const unsigned char *inend,
+ unsigned char *outbufstart, size_t *irreversible,
+ int do_flush, int consume_incomplete);
+
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define LOOPFCT FROM_LOOP
@@ -748,20 +804,31 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
/* XXX unaligned. */ \
if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f) \
{ \
- if (! ignore_errors_p ()) \
+ if (step_data->__trans.__trans_fct != NULL) \
+ { \
+ result = DL_CALL_FCT (step_data->__trans.__trans_fct, \
+ (step, step_data, *inptrp, &inptr, inend, \
+ *outptrp, &outptr, outend, irreversible)); \
+ if (result != __GCONV_OK) \
+ break; \
+ } \
+ else if (! ignore_errors_p ()) \
{ \
/* This is no correct ANSI_X3.4-1968 character. */ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
- \
- ++*irreversible; \
- inptr += 4; \
+ else \
+ { \
+ ++*irreversible; \
+ inptr += 4; \
+ } \
} \
else \
/* It's an one byte sequence. */ \
*outptr++ = *((uint32_t *) inptr)++; \
}
+#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>
@@ -916,7 +983,7 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
continue; \
} \
\
- if (NEED_LENGTH_TEST && __builtin_expect (inptr + cnt > inend, 0)) \
+ if (__builtin_expect (inptr + cnt > inend, 0)) \
{ \
/* We don't have enough input. But before we report that check \
that all the bytes are correct. */ \
@@ -979,6 +1046,7 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
/* Now adjust the pointers and store the result. */ \
*((uint32_t *) outptr)++ = ch; \
}
+#define LOOP_NEED_FLAGS
#define STORE_REST \
{ \
@@ -1125,18 +1193,29 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
{ \
if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000) \
{ \
- if (! ignore_errors_p ()) \
+ if (step_data->__trans.__trans_fct != NULL) \
+ { \
+ result = DL_CALL_FCT (step_data->__trans.__trans_fct, \
+ (step, step_data, *inptrp, &inptr, inend, \
+ *outptrp, &outptr, outend, irreversible)); \
+ if (result != __GCONV_OK) \
+ break; \
+ } \
+ else if (! ignore_errors_p ()) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
- \
- inptr += 4; \
- ++*irreversible; \
+ else \
+ { \
+ inptr += 4; \
+ ++*irreversible; \
+ } \
} \
else \
*((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
}
+#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>
@@ -1181,17 +1260,29 @@ ucs4le_internal_loop_single (const unsigned char **inptrp,
uint32_t val = *((uint32_t *) inptr); \
if (__builtin_expect (val, 0) >= 0x10000) \
{ \
- if (! ignore_errors_p ()) \
+ if (step_data->__trans.__trans_fct != NULL) \
+ { \
+ result = DL_CALL_FCT (step_data->__trans.__trans_fct, \
+ (step, step_data, *inptrp, &inptr, inend, \
+ *outptrp, &outptr, outend, irreversible)); \
+ if (result != __GCONV_OK) \
+ break; \
+ } \
+ else if (! ignore_errors_p ()) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
- \
- inptr += 4; \
- ++*irreversible; \
+ else \
+ { \
+ inptr += 4; \
+ ++*irreversible; \
+ } \
+ continue; \
} \
*((uint16_t *) outptr)++ = bswap_16 (val); \
inptr += 4; \
}
+#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
new file mode 100644
index 0000000000..11c542e744
--- /dev/null
+++ b/iconv/gconv_trans.c
@@ -0,0 +1,50 @@
+/* Transliteration using the locale's data.
+ Copyright (C) 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include <stdint.h>
+
+#include "gconv_int.h"
+#include "../locale/localeinfo.h"
+
+
+int
+gconv_transliterate (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ __const unsigned char *inbufstart,
+ __const unsigned char **inbufp,
+ __const unsigned char *inbufend,
+ unsigned char *outbufstart,
+ unsigned char **outbufp, unsigned char *outbufend,
+ size_t *irreversible)
+{
+ /* Find out about the locale's transliteration. */
+ uint_fast32_t size = _NL_CURRENT_WORD (LC_CTYPE,
+ _NL_CTYPE_TRANSLIT_HASH_SIZE);
+ uint_fast32_t layers = _NL_CURRENT_WORD (LC_CTYPE,
+ _NL_CTYPE_TRANSLIT_HASH_LAYERS);
+
+ /* If there is no transliteration information in the locale don't do
+ anything and return the error. */
+ if (size == 0)
+ return __GCONV_ILLEGAL_INPUT;
+
+ /* XXX For now we don't do anything. */
+ return __GCONV_ILLEGAL_INPUT;
+}
diff --git a/iconv/loop.c b/iconv/loop.c
index c01e52040e..ebbc1362b3 100644
--- a/iconv/loop.c
+++ b/iconv/loop.c
@@ -175,88 +175,57 @@
/* The function returns the status, as defined in gconv.h. */
static inline int
-FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
+FCTNAME (LOOPFCT) (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp, const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data,
size_t *irreversible EXTRA_LOOP_DECLS)
{
- int result = __GCONV_OK;
+#ifdef LOOP_NEED_STATE
+ mbstate_t *state = step_data->__statep;
+#endif
+#ifdef LOOP_NEED_FLAGS
+ int flags = step_data->__flags;
+#endif
+#ifdef LOOP_NEED_DATA
+ void *data = step->__data;
+#endif
+ int result = __GCONV_EMPTY_INPUT;
const unsigned char *inptr = *inptrp;
unsigned char *outptr = *outptrp;
- /* We run one loop where we avoid checks for underflow/overflow of the
- buffers to speed up the conversion a bit. */
- size_t min_in_rounds = (inend - inptr) / MAX_NEEDED_INPUT;
- size_t min_out_rounds = (outend - outptr) / MAX_NEEDED_OUTPUT;
- size_t min_rounds = MIN (min_in_rounds, min_out_rounds);
-
#ifdef INIT_PARAMS
INIT_PARAMS;
#endif
-#undef NEED_LENGTH_TEST
-#define NEED_LENGTH_TEST 0
- while (min_rounds-- > 0)
+ while (inptr != inend)
{
- /* Here comes the body the user provides. It can stop with RESULT
- set to GCONV_INCOMPLETE_INPUT (if the size of the input characters
- vary in size), GCONV_ILLEGAL_INPUT, or GCONV_FULL_OUTPUT (if the
- output characters vary in size. */
- BODY
- }
-
- if (result == __GCONV_OK)
- {
-#if MIN_NEEDED_INPUT == MAX_NEEDED_INPUT \
- && MIN_NEEDED_OUTPUT == MAX_NEEDED_OUTPUT
- /* We don't need to start another loop since we were able to determine
- the maximal number of characters to copy in advance. What remains
- to be determined is the status. */
- if (inptr == inend)
- /* No more input. */
- result = __GCONV_EMPTY_INPUT;
- else if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend)
- || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend))
- /* Overflow in the output buffer. */
- result = __GCONV_FULL_OUTPUT;
- else
- /* We have something left in the input buffer. */
- result = __GCONV_INCOMPLETE_INPUT;
-#else
- result = __GCONV_EMPTY_INPUT;
-
-# undef NEED_LENGTH_TEST
-# define NEED_LENGTH_TEST 1
- while (inptr != inend)
+ /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
+ compiler generating better code. It will optimized away
+ since MIN_NEEDED_OUTPUT is always a constant. */
+ if ((MIN_NEEDED_OUTPUT != 1
+ && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
+ || (MIN_NEEDED_OUTPUT == 1
+ && __builtin_expect (outptr >= outend, 0)))
+ {
+ /* Overflow in the output buffer. */
+ result = __GCONV_FULL_OUTPUT;
+ break;
+ }
+ if (MIN_NEEDED_INPUT > 1
+ && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
{
- /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
- compiler generating better code. It will optimized away
- since MIN_NEEDED_OUTPUT is always a constant. */
- if ((MIN_NEEDED_OUTPUT != 1
- && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
- || (MIN_NEEDED_OUTPUT == 1
- && __builtin_expect (outptr >= outend, 0)))
- {
- /* Overflow in the output buffer. */
- result = __GCONV_FULL_OUTPUT;
- break;
- }
- if (MIN_NEEDED_INPUT > 1
- && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
- {
- /* We don't have enough input for another complete input
- character. */
- result = __GCONV_INCOMPLETE_INPUT;
- break;
- }
-
- /* Here comes the body the user provides. It can stop with
- RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
- input characters vary in size), GCONV_ILLEGAL_INPUT, or
- GCONV_FULL_OUTPUT (if the output characters vary in size). */
- BODY
+ /* We don't have enough input for another complete input
+ character. */
+ result = __GCONV_INCOMPLETE_INPUT;
+ break;
}
-#endif /* Input and output charset are not both fixed width. */
+
+ /* Here comes the body the user provides. It can stop with
+ RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
+ input characters vary in size), GCONV_ILLEGAL_INPUT, or
+ GCONV_FULL_OUTPUT (if the output characters vary in size). */
+ BODY
}
/* Update the pointers pointed to by the parameters. */
@@ -291,11 +260,19 @@ FCTNAME (LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
# define SINGLE(fct) SINGLE2 (fct)
# define SINGLE2(fct) fct##_single
static inline int
-SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
+SINGLE(LOOPFCT) (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp, const unsigned char *inend,
unsigned char **outptrp, unsigned char *outend,
- mbstate_t *state, int flags, void *data, size_t *irreversible
- EXTRA_LOOP_DECLS)
+ size_t *irreversible EXTRA_LOOP_DECLS)
{
+ mbstate_t *state = step_data->__statep;
+#ifdef LOOP_NEED_FLAGS
+ int flags = step_data->__flags;
+#endif
+#ifdef LOOP_NEED_DATA
+ void *data = step->__data;
+#endif
int result = __GCONV_OK;
unsigned char bytebuf[MAX_NEEDED_INPUT];
const unsigned char *inptr = *inptrp;
@@ -347,8 +324,7 @@ SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
inptr = bytebuf;
inend = &bytebuf[inlen];
-#undef NEED_LENGTH_TEST
-#define NEED_LENGTH_TEST 1
+
do
{
BODY
@@ -410,9 +386,12 @@ SINGLE(LOOPFCT) (const unsigned char **inptrp, const unsigned char *inend,
#undef EXTRA_LOOP_DECLS
#undef INIT_PARAMS
#undef UPDATE_PARAMS
+#undef UNPACK_BYTES
+#undef LOOP_NEED_STATE
+#undef LOOP_NEED_FLAGS
+#undef LOOP_NEED_DATA
#undef get16
#undef get32
#undef put16
#undef put32
#undef unaligned
-#undef UNPACK_BYTES
diff --git a/iconv/skeleton.c b/iconv/skeleton.c
index 9b7b4a1125..dca2c7f7a6 100644
--- a/iconv/skeleton.c
+++ b/iconv/skeleton.c
@@ -271,7 +271,8 @@ gconv_init (struct __gconv_step *step)
int
FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
const unsigned char **inptrp, const unsigned char *inend,
- size_t *irreversible, int do_flush, int consume_incomplete)
+ unsigned char *outbufstart, size_t *irreversible, int do_flush,
+ int consume_incomplete)
{
struct __gconv_step *next_step = step + 1;
struct __gconv_step_data *next_data = data + 1;
@@ -295,13 +296,14 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
successfully emitted the escape sequence. */
if (status == __GCONV_OK && ! (data->__flags & __GCONV_IS_LAST))
status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL,
- irreversible, 1, consume_incomplete));
+ next_data->__outbuf, irreversible, 1,
+ consume_incomplete));
}
else
{
/* We preserve the initial values of the pointer variables. */
const unsigned char *inptr = *inptrp;
- unsigned char *outbuf = data->__outbuf;
+ unsigned char *outbuf = outbufstart;
unsigned char *outend = data->__outbufend;
unsigned char *outstart;
/* This variable is used to count the number of characters we
@@ -333,19 +335,16 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
# if MAX_NEEDED_FROM > 1
if (MAX_NEEDED_TO == 1 || FROM_DIRECTION)
- status = SINGLE(FROM_LOOP) (inptrp, inend, &outbuf, outend,
- data->__statep, data->__flags,
- step->__data, &lirreversible
+ status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf,
+ outend, &lirreversible
EXTRA_LOOP_ARGS);
# endif
# if MAX_NEEDED_FROM > 1 && MAX_NEEDED_TO > 1 && !ONE_DIRECTION
else
# endif
# if MAX_NEEDED_TO > 1 && !ONE_DIRECTION
- status = SINGLE(TO_LOOP) (inptrp, inend, &outbuf, outend,
- data->__statep, data->__flags,
- step->__data, &lirreversible
- EXTRA_LOOP_ARGS);
+ status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf,
+ outend, &lirreversible EXTRA_LOOP_ARGS);
# endif
if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
@@ -386,16 +385,12 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
{
if (FROM_DIRECTION)
/* Run the conversion loop. */
- status = FROM_LOOP (inptrp, inend, &outbuf, outend,
- data->__statep, data->__flags,
- step->__data, &lirreversible
- EXTRA_LOOP_ARGS);
+ status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend,
+ &lirreversible EXTRA_LOOP_ARGS);
else
/* Run the conversion loop. */
- status = TO_LOOP (inptrp, inend, &outbuf, outend,
- data->__statep, data->__flags,
- step->__data, &lirreversible
- EXTRA_LOOP_ARGS);
+ status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend,
+ &lirreversible EXTRA_LOOP_ARGS);
}
#if !defined _STRING_ARCH_unaligned \
&& MIN_NEEDED_FROM != 1 && MAX_NEEDED_FROM % MIN_NEEDED_FROM == 0 \
@@ -404,18 +399,14 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
{
if (FROM_DIRECTION)
/* Run the conversion loop. */
- status = GEN_unaligned (FROM_LOOP) (inptrp, inend, &outbuf,
- outend, data->__statep,
- data->__flags,
- step->__data,
+ status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend,
+ &outbuf, outend,
&lirreversible
EXTRA_LOOP_ARGS);
else
/* Run the conversion loop. */
- status = GEN_unaligned (TO_LOOP) (inptrp, inend, &outbuf,
- outend, data->__statep,
- data->__flags,
- step->__data,
+ status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend,
+ &outbuf, outend,
&lirreversible
EXTRA_LOOP_ARGS);
}
@@ -445,7 +436,8 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
int result;
result = DL_CALL_FCT (fct, (next_step, next_data, &outerr,
- outbuf, irreversible, 0,
+ outbuf, next_data->__outbuf,
+ irreversible, 0,
consume_incomplete));
if (result != __GCONV_EMPTY_INPUT)
@@ -471,22 +463,20 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
/* XXX Handle unaligned access here as well. */
if (FROM_DIRECTION)
/* Run the conversion loop. */
- nstatus = FROM_LOOP ((const unsigned char **) inptrp,
+ nstatus = FROM_LOOP (step, data,
+ (const unsigned char **) inptrp,
(const unsigned char *) inend,
(unsigned char **) &outbuf,
(unsigned char *) outerr,
- data->__statep, data->__flags,
- step->__data, &lirreversible
- EXTRA_LOOP_ARGS);
+ &lirreversible EXTRA_LOOP_ARGS);
else
/* Run the conversion loop. */
- nstatus = TO_LOOP ((const unsigned char **) inptrp,
+ nstatus = TO_LOOP (step, data,
+ (const unsigned char **) inptrp,
(const unsigned char *) inend,
(unsigned char **) &outbuf,
(unsigned char *) outerr,
- data->__statep, data->__flags,
- step->__data, &lirreversible
- EXTRA_LOOP_ARGS);
+ &lirreversible EXTRA_LOOP_ARGS);
/* We must run out of output buffer space in this
rerun. */