aboutsummaryrefslogtreecommitdiff
path: root/iconv
diff options
context:
space:
mode:
Diffstat (limited to 'iconv')
-rw-r--r--iconv/gconv.c6
-rw-r--r--iconv/gconv.h6
-rw-r--r--iconv/gconv_int.h21
-rw-r--r--iconv/gconv_open.c2
-rw-r--r--iconv/gconv_simple.c6
-rw-r--r--iconv/gconv_trans.c122
-rw-r--r--iconv/skeleton.c26
7 files changed, 149 insertions, 40 deletions
diff --git a/iconv/gconv.c b/iconv/gconv.c
index 19f95622a2..0b7b917b6a 100644
--- a/iconv/gconv.c
+++ b/iconv/gconv.c
@@ -46,8 +46,8 @@ __gconv (__gconv_t cd, const unsigned char **inbuf,
if (inbuf == NULL || *inbuf == NULL)
/* We just flush. */
result = DL_CALL_FCT (cd->__steps->__fct,
- (cd->__steps, cd->__data, NULL, NULL,
- cd->__data[0].__outbuf, irreversible, 1, 0));
+ (cd->__steps, cd->__data, NULL, NULL, NULL,
+ irreversible, 1, 0));
else
{
const unsigned char *last_start;
@@ -59,7 +59,7 @@ __gconv (__gconv_t cd, const unsigned char **inbuf,
last_start = *inbuf;
result = DL_CALL_FCT (cd->__steps->__fct,
(cd->__steps, cd->__data, inbuf, inbufend,
- cd->__data[0].__outbuf, irreversible, 0, 0));
+ NULL, irreversible, 0, 0));
}
while (result == __GCONV_EMPTY_INPUT && last_start != *inbuf
&& *inbuf + cd->__steps->__min_needed_from <= inbufend);
diff --git a/iconv/gconv.h b/iconv/gconv.h
index 2bba133d0c..0164b62c8b 100644
--- a/iconv/gconv.h
+++ b/iconv/gconv.h
@@ -68,7 +68,7 @@ struct __gconv_trans_data;
/* Type of a conversion function. */
typedef int (*__gconv_fct) (struct __gconv_step *, struct __gconv_step_data *,
__const unsigned char **, __const unsigned char *,
- unsigned char *, size_t *, int, int);
+ unsigned char **, size_t *, int, int);
/* Constructor and destructor for local data for conversion step. */
typedef int (*__gconv_init_fct) (struct __gconv_step *);
@@ -80,8 +80,8 @@ typedef int (*__gconv_trans_fct) (struct __gconv_step *step,
struct __gconv_step_data *step_data,
__const unsigned char *,
__const unsigned char **,
- __const unsigned char *, unsigned char *,
- unsigned char **, unsigned char *, size_t *);
+ __const unsigned char *, unsigned char **,
+ size_t *);
/* Function to call to provide transliteration module with context. */
typedef int (*__gconv_trans_context_fct) (struct __gconv_trans_data *data,
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 87287d7b26..34dff7d522 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -181,15 +181,13 @@ extern void __gconv_get_builtin_trans (const char *name,
internal_function;
/* Transliteration using the locale's data. */
-extern int gconv_transliterate (struct __gconv_step *step,
- struct __gconv_step_data *step_data,
- __const unsigned char *inbufstart,
- __const unsigned char **inbufp,
- __const unsigned char *inbufend,
- unsigned char *outbufstart,
- unsigned char **outbufp,
- unsigned char *outbufend,
- size_t *irreversible);
+extern int __gconv_transliterate (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ __const unsigned char *inbufstart,
+ __const unsigned char **inbufp,
+ __const unsigned char *inbufend,
+ unsigned char **outbufstart,
+ size_t *irreversible);
/* Builtin transformations. */
@@ -198,8 +196,9 @@ extern int gconv_transliterate (struct __gconv_step *step,
extern int Name (struct __gconv_step *step, \
struct __gconv_step_data *data, \
const unsigned char **inbuf, \
- const unsigned char *inbufend, unsigned char *outbufstart, \
- size_t *irreversible, int do_flush, int consume_incomplete)
+ const unsigned char *inbufend, \
+ unsigned char **outbufstart, size_t *irreversible, \
+ int do_flush, int consume_incomplete)
__BUILTIN_TRANS (__gconv_transform_ascii_internal);
__BUILTIN_TRANS (__gconv_transform_internal_ascii);
diff --git a/iconv/gconv_open.c b/iconv/gconv_open.c
index 984ca9dc5d..14f1d5e0f9 100644
--- a/iconv/gconv_open.c
+++ b/iconv/gconv_open.c
@@ -86,7 +86,7 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle,
csnames = internal_trans_names;
ncsnames = 1;
- trans_fct = gconv_transliterate;
+ trans_fct = __gconv_transliterate;
/* No context, init, or end function. */
}
else if (strcasecmp (errhand, "WORK AROUND A GCC BUG") == 0)
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 5033f3ef83..390574582f 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -801,7 +801,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
{ \
result = DL_CALL_FCT (step_data->__trans.__trans_fct, \
(step, step_data, *inptrp, &inptr, inend, \
- *outptrp, &outptr, outend, irreversible)); \
+ &outptr, irreversible)); \
if (result != __GCONV_OK) \
break; \
} \
@@ -1190,7 +1190,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
{ \
result = DL_CALL_FCT (step_data->__trans.__trans_fct, \
(step, step_data, *inptrp, &inptr, inend, \
- *outptrp, &outptr, outend, irreversible)); \
+ &outptr, irreversible)); \
if (result != __GCONV_OK) \
break; \
} \
@@ -1257,7 +1257,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
{ \
result = DL_CALL_FCT (step_data->__trans.__trans_fct, \
(step, step_data, *inptrp, &inptr, inend, \
- *outptrp, &outptr, outend, irreversible)); \
+ &outptr, irreversible)); \
if (result != __GCONV_OK) \
break; \
} \
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
index 56c3ff6be6..829ff5f981 100644
--- a/iconv/gconv_trans.c
+++ b/iconv/gconv_trans.c
@@ -18,6 +18,7 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+#include <dlfcn.h>
#include <stdint.h>
#include "gconv_int.h"
@@ -25,26 +26,121 @@
int
-gconv_transliterate (struct __gconv_step *step,
- struct __gconv_step_data *step_data,
- const unsigned char *inbufstart,
- const unsigned char **inbufp,
- const unsigned char *inbufend,
- unsigned char *outbufstart,
- unsigned char **outbufp, unsigned char *outbufend,
- size_t *irreversible)
+__gconv_transliterate (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char *inbufstart,
+ const unsigned char **inbufp,
+ const unsigned char *inbufend,
+ unsigned char **outbufstart, size_t *irreversible)
{
/* Find out about the locale's transliteration. */
- uint_fast32_t size = _NL_CURRENT_WORD (LC_CTYPE,
- _NL_CTYPE_TRANSLIT_HASH_SIZE);
- uint_fast32_t layers = _NL_CURRENT_WORD (LC_CTYPE,
- _NL_CTYPE_TRANSLIT_HASH_LAYERS);
+ uint_fast32_t size;
+ uint_fast32_t layers;
+ uint32_t *from_idx;
+ uint32_t *from_tbl;
+ uint32_t *to_idx;
+ uint32_t *to_tbl;
+ uint32_t *winbuf;
+ uint32_t *winbufend;
+ uint_fast32_t low;
+ uint_fast32_t high;
/* If there is no transliteration information in the locale don't do
anything and return the error. */
+ size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_SIZE);
if (size == 0)
return __GCONV_ILLEGAL_INPUT;
- /* XXX For now we don't do anything. */
+ /* Get the rest of the values. */
+ layers = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_LAYERS);
+ from_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
+ from_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
+ to_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
+ to_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
+
+ /* The input buffer. There are actually 4-byte values. */
+ winbuf = (uint32_t *) *inbufp;
+ winbufend = (uint32_t *) inbufend;
+
+ /* Test whether there is enough input. */
+ if (winbuf + 1 > winbufend)
+ return (winbuf == winbufend
+ ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
+
+ /* The array starting at FROM_IDX contains indeces to the string table
+ in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
+ are doing binary search. */
+ low = 0;
+ high = size;
+ while (low < high)
+ {
+ uint_fast32_t med = (low + high) / 2;
+ uint32_t idx;
+ int cnt;
+
+ /* Compare the string at this index with the string at the current
+ position in the input buffer. */
+ idx = from_idx[med];
+ cnt = 0;
+ do
+ {
+ if (from_tbl[idx + cnt] != winbuf[cnt])
+ /* Does not match. */
+ break;
+ ++cnt;
+ }
+ while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
+
+ if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
+ {
+ /* Found a matching input sequence. Now try to convert the
+ possible replacements. */
+ uint32_t idx2 = to_idx[med];
+
+ do
+ {
+ /* Determine length of replacement. */
+ uint_fast32_t len = 0;
+ int res;
+ const unsigned char *toinptr;
+
+ while (to_tbl[idx2 + len] != L'\0')
+ ++len;
+
+ /* Try this input text. */
+ toinptr = (const unsigned char *) &to_tbl[idx2];
+ res = DL_CALL_FCT (step->__fct,
+ (step, step_data, &toinptr,
+ (const unsigned char *) &to_tbl[idx2 + len],
+ (unsigned char **) outbufstart,
+ irreversible, 0, 0));
+ if (res != __GCONV_ILLEGAL_INPUT)
+ {
+ /* If the conversion succeeds we have to increment the
+ input buffer. */
+ if (res == __GCONV_EMPTY_INPUT)
+ {
+ *inbufp += cnt * sizeof (uint32_t);
+ ++*irreversible;
+ }
+
+ return res;
+ }
+
+ /* Next replacement. */
+ idx2 += len + 1;
+ }
+ while (to_tbl[idx2] != L'\0');
+
+ /* Nothing found, continue searching. */
+ }
+
+ if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
+ low = idx;
+ else
+ high = idx;
+ }
+
+ /* Haven't found a match. */
return __GCONV_ILLEGAL_INPUT;
}
diff --git a/iconv/skeleton.c b/iconv/skeleton.c
index d64d7599b4..8dbebb81ac 100644
--- a/iconv/skeleton.c
+++ b/iconv/skeleton.c
@@ -271,7 +271,7 @@ gconv_init (struct __gconv_step *step)
int
FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
const unsigned char **inptrp, const unsigned char *inend,
- unsigned char *outbufstart, size_t *irreversible, int do_flush,
+ unsigned char **outbufstart, size_t *irreversible, int do_flush,
int consume_incomplete)
{
struct __gconv_step *next_step = step + 1;
@@ -288,6 +288,9 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
{
status = __GCONV_OK;
+ /* This should never happen during error handling. */
+ assert (outbufstart == NULL);
+
#ifdef EMIT_SHIFT_TO_INIT
/* Emit the escape sequence to reset the state. */
EMIT_SHIFT_TO_INIT;
@@ -296,14 +299,15 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
successfully emitted the escape sequence. */
if (status == __GCONV_OK && ! (data->__flags & __GCONV_IS_LAST))
status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL,
- next_data->__outbuf, irreversible, 1,
+ NULL, irreversible, 1,
consume_incomplete));
}
else
{
/* We preserve the initial values of the pointer variables. */
const unsigned char *inptr = *inptrp;
- unsigned char *outbuf = outbufstart;
+ unsigned char *outbuf = (__builtin_expect (outbufstart == NULL, 1)
+ ? data->__outbuf : *outbufstart);
unsigned char *outend = data->__outbufend;
unsigned char *outstart;
/* This variable is used to count the number of characters we
@@ -331,7 +335,10 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
|| (MAX_NEEDED_TO > 1 && !FROM_DIRECTION))
&& consume_incomplete && (data->__statep->__count & 7) != 0)
{
- /* Yep, we have some bytes left over. Process them now. */
+ /* Yep, we have some bytes left over. Process them now.
+ But this must not happen while we are called from an
+ error handler. */
+ assert (outbufstart == NULL);
# if MAX_NEEDED_FROM > 1
if (MAX_NEEDED_TO == 1 || FROM_DIRECTION)
@@ -412,6 +419,14 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
}
#endif
+ /* If we were called as part of an error handling module we
+ don't do anything else here. */
+ if (__builtin_expect (outbufstart != NULL, 0))
+ {
+ *outbufstart = outbuf;
+ return status;
+ }
+
/* Give the transliteration module the chance to store the
original text and the result in case it needs a context. */
if (data->__trans.__trans_context_fct != NULL)
@@ -443,8 +458,7 @@ FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
int result;
result = DL_CALL_FCT (fct, (next_step, next_data, &outerr,
- outbuf, next_data->__outbuf,
- irreversible, 0,
+ outbuf, NULL, irreversible, 0,
consume_incomplete));
if (result != __GCONV_EMPTY_INPUT)