diff options
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | iconv/loop.c | 12 | ||||
-rw-r--r-- | iconv/skeleton.c | 27 | ||||
-rw-r--r-- | iconvdata/Makefile | 2 | ||||
-rw-r--r-- | iconvdata/TESTS | 2 | ||||
-rw-r--r-- | iconvdata/iso646.c | 332 | ||||
-rw-r--r-- | iconvdata/testdata/ANSI_X3.4-1968 | 6 | ||||
-rw-r--r-- | iconvdata/testdata/BS_4730 | 6 | ||||
-rw-r--r-- | iconvdata/testdata/BS_4730..UTF8 | 6 |
9 files changed, 204 insertions, 202 deletions
@@ -1,3 +1,16 @@ +1998-04-21 20:32 Ulrich Drepper <drepper@cygnus.com> + + * iconv/loop.c: Allow EXTRA_LOOP_DECLS be defined and use it in + function declaration. + * iconv/skeleton.c: Allow PREPARE_LOOP and EXTRA_LOOP_ARGS be defined + and use them in conversion function. + * iconvdata/Makefile (modules): Re-add ISO6446. + * iconvdata/TEST: Add tests for ASCII variants. + * iconvdata/iso646.c: Rewrite to use loop.c and skeleton.c. + * iconvdata/testdata/ANSI_X3.4-1968: New file. + * iconvdata/testdata/BS_4730: New file. + * iconvdata/testdata/BS_4730..UTF8: New file. + 1998-04-21 19:05 Ulrich Drepper <drepper@cygnus.com> * iconv/gconv.h (struct gconv_step): Add element stateful. diff --git a/iconv/loop.c b/iconv/loop.c index e1c1ab6e49..f8188873dc 100644 --- a/iconv/loop.c +++ b/iconv/loop.c @@ -43,6 +43,8 @@ BODY this is supposed to expand to the body of the loop. The user must provide this. + + EXTRA_LOOP_DECLS Extra arguments passed from converion loop call. */ #include <gconv.h> @@ -106,11 +108,18 @@ #endif +/* If no arguments have to passed to the loop function define the macro + as empty. */ +#ifndef EXTRA_LOOP_DECLS +# define EXTRA_LOOP_DECLS +#endif + + /* The function returns the status, as defined in gconv.h. */ static inline int LOOPFCT (const unsigned char **inptrp, const unsigned char *inend, unsigned char **outptrp, unsigned char *outend, mbstate_t *state, - void *data, size_t *converted) + void *data, size_t *converted EXTRA_LOOP_DECLS) { int result = GCONV_OK; const unsigned char *inptr = *inptrp; @@ -222,3 +231,4 @@ LOOPFCT (const unsigned char **inptrp, const unsigned char *inend, #undef COUNT_CONVERTED #undef BODY #undef LOOPFCT +#undef EXTRA_LOOP_DECLS diff --git a/iconv/skeleton.c b/iconv/skeleton.c index 36cc33c9be..8421941a1e 100644 --- a/iconv/skeleton.c +++ b/iconv/skeleton.c @@ -67,6 +67,12 @@ to cover only those characters up to the error. FUNCTION_NAME if not set the conversion function is named `gconv'. + + PREPARE_LOOP optional code preparing the conversion loop. Can + contain variable definitions. + + EXTRA_LOOP_ARGS optional macro specifying extra arguments passed + to loop function. */ #include <assert.h> @@ -161,6 +167,13 @@ gconv_init (struct gconv_step *step) #endif +/* If no arguments have to passed to the loop function define the macro + as empty. */ +#ifndef EXTRA_LOOP_ARGS +# define EXTRA_LOOP_ARGS +#endif + + /* This is the actual conversion function. */ #ifndef FUNCTION_NAME # define FUNCTION_NAME gconv @@ -210,6 +223,10 @@ FUNCTION_NAME (struct gconv_step *step, struct gconv_step_data *data, char *outend = data->outbufend; char *outptr; +#ifdef PREPARE_LOOP + PREPARE_LOOP +#endif + do { /* Remember the start value for this round. */ @@ -229,14 +246,16 @@ FUNCTION_NAME (struct gconv_step *step, struct gconv_step_data *data, (const unsigned char *) inbufend, (unsigned char **) &outbuf, (unsigned char *) outend, - data->statep, step->data, &converted); + data->statep, step->data, &converted + EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ status = TO_LOOP ((const unsigned char **) inbuf, (const unsigned char *) inbufend, (unsigned char **) &outbuf, (unsigned char *) outend, - data->statep, step->data, &converted); + data->statep, step->data, &converted + EXTRA_LOOP_ARGS); /* If this is the last step leave the loop, there is nothgin we can do. */ @@ -284,7 +303,7 @@ FUNCTION_NAME (struct gconv_step *step, struct gconv_step_data *data, (unsigned char **) &outbuf, (unsigned char *) outerr, data->statep, step->data, - &converted); + &converted EXTRA_LOOP_ARGS); else /* Run the conversion loop. */ nstatus = TO_LOOP ((const unsigned char **) inbuf, @@ -292,7 +311,7 @@ FUNCTION_NAME (struct gconv_step *step, struct gconv_step_data *data, (unsigned char **) &outbuf, (unsigned char *) outerr, data->statep, step->data, - &converted); + &converted EXTRA_LOOP_ARGS); /* We must run out of output buffer space in this rerun. */ diff --git a/iconvdata/Makefile b/iconvdata/Makefile index c590ab20b4..d8fda78330 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -27,7 +27,7 @@ modules := ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5 \ T.61 ISO_6937 SJIS KOI-8 KOI8-R LATIN-GREEK LATIN-GREEK-1 \ HP-ROMAN8 EBCDIC-AT-DE EBCDIC-AT-DE-A EBCDIC-CA-FR \ EUC-KR UHC JOHAB libJIS libKSC BIG5 EUC-JP libGB \ - EUC-CN libCNS EUC-TW # ISO646 + EUC-CN libCNS EUC-TW ISO646 modules.so := $(addsuffix .so, $(modules)) diff --git a/iconvdata/TESTS b/iconvdata/TESTS index c37381e70d..b4c95057c8 100644 --- a/iconvdata/TESTS +++ b/iconvdata/TESTS @@ -39,3 +39,5 @@ ISO-8859-7 ISO-8859-7 Y UTF8 ISO-8859-8 ISO-8859-8 Y UTF8 ISO-8859-9 ISO-8859-9 Y UTF8 ISO-8859-10 ISO-8859-10 Y UCS2 UTF8 +ANSI_X3.4-1968 ANSI_X3.4-1968 Y UTF8 +BS_4730 BS_4730 Y UTF8 diff --git a/iconvdata/iso646.c b/iconvdata/iso646.c index 3c40c8fd0c..4b7c2bbbbf 100644 --- a/iconvdata/iso646.c +++ b/iconvdata/iso646.c @@ -32,9 +32,23 @@ proofs to be necessary. */ #include <gconv.h> -#include <stdlib.h> +#include <stdint.h> #include <string.h> +/* Definitions used in the body of the `gconv' function. */ +#define FROM_LOOP from_ascii +#define TO_LOOP to_ascii +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MIN_NEEDED_TO 4 +#define FROM_DIRECTION dir == from_iso646 +#define PREPARE_LOOP \ + enum direction dir = ((struct iso646_data *) step->data)->dir; \ + enum variant var = ((struct iso646_data *) step->data)->var; +#define EXTRA_LOOP_ARGS , var + + /* Direction of the transformation. */ enum direction { @@ -66,22 +80,22 @@ gconv_init (struct gconv_step *step) enum variant var; int result; - if (strcasestr (step->from_name, "ANSI_X3.4-1968") != NULL) + if (__strcasestr (step->from_name, "ANSI_X3.4-1968") != NULL) { dir = from_iso646; var = US; } - else if (strcasestr (step->from_name, "BS_4730") != NULL) + else if (__strcasestr (step->from_name, "BS_4730") != NULL) { dir = from_iso646; var = GB; } - else if (strcasestr (step->to_name, "ANSI_X3.4-1968") != NULL) + else if (__strcasestr (step->to_name, "ANSI_X3.4-1968") != NULL) { dir = to_iso646; var = US; } - else if (strcasestr (step->to_name, "BS_4730") != NULL) + else if (__strcasestr (step->to_name, "BS_4730") != NULL) { dir = to_iso646; var = GB; @@ -104,6 +118,13 @@ gconv_init (struct gconv_step *step) result = GCONV_OK; } + step->min_needed_from = MIN_NEEDED_FROM; + step->max_needed_from = MIN_NEEDED_FROM; + step->min_needed_to = MIN_NEEDED_TO; + step->max_needed_to = MIN_NEEDED_TO; + + step->stateful = 0; + return result; } @@ -115,194 +136,113 @@ gconv_end (struct gconv_step *data) } -int -gconv (struct gconv_step *step, struct gconv_step_data *data, - const char *inbuf, size_t *inbufsize, size_t *written, int do_flush) -{ - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - gconv_fct fct = next_step->fct; - size_t do_write; - int result; - - /* If the function is called with no input this means we have to reset - to the initial state. The possibly partly converted input is - dropped. */ - if (do_flush) - { - do_write = 0; - - /* Call the steps down the chain if there are any. */ - if (data->is_last) - result = GCONV_OK; - else - { - struct gconv_step *next_step = step + 1; - struct gconv_step_data *next_data = data + 1; - - result = (*fct) (next_step, next_data, NULL, 0, written, 1); - - /* Clear output buffer. */ - data->outbufavail = 0; - } - } - else - { - enum direction dir = ((struct iso646_data *) step->data)->dir; - enum variant var = ((struct iso646_data *) step->data)->var; - - do_write = 0; - - do - { - result = GCONV_OK; - - if (dir == from_iso646) - { - size_t inchars = *inbufsize; - size_t outwchars = data->outbufavail; - char *outbuf = data->outbuf; - size_t cnt = 0; - - while (cnt < inchars - && (outwchars + sizeof (wchar_t) <= data->outbufsize)) - { - switch ((unsigned char) inbuf[cnt]) - { - case '\x23': - if (var == GB) - *((wchar_t *) (outbuf + outwchars)) = 0xa3; - else - *((wchar_t *) (outbuf + outwchars)) = 0x23; - break; - case '\x75': - if (var == GB) - *((wchar_t *) (outbuf + outwchars)) = 0x203e; - else - *((wchar_t *) (outbuf + outwchars)) = 0x75; - break; - default: - *((wchar_t *) (outbuf + outwchars)) = - (unsigned char) inbuf[cnt]; - case '\x80' ... '\xff': - /* Illegal character. */ - result = GCONV_ILLEGAL_INPUT; - goto out_from; - } - ++do_write; - outwchars += sizeof (wchar_t); - ++cnt; - } - out_from: - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outwchars; - } - else - { - size_t inwchars = *inbufsize; - size_t outchars = data->outbufavail; - unsigned char *outbuf = data->outbuf; - size_t cnt = 0; - - while (inwchars >= cnt + sizeof (wchar_t) - && outchars < data->outbufsize) - { - switch (*((wchar_t *) (inbuf + cnt))) - { - case 0x23: - if (var == GB) - goto out_to; - outbuf[outchars] = 0x23; - break; - case 0x75: - if (var == GB) - goto out_to; - outbuf[outchars] = 0x75; - break; - case 0xa3: - if (var != GB) - goto out_to; - outbuf[outchars] = 0x23; - break; - case 0x203e: - if (var != GB) - goto out_to; - outbuf[outchars] = 0x75; - break; - default: - if (*((wchar_t *) (inbuf + cnt)) > 0x7f) - goto out_to; - outbuf[outchars] = - (unsigned char) *((wchar_t *) (inbuf + cnt)); - break; - } - - ++do_write; - ++outchars; - cnt += sizeof (wchar_t); - } - out_to: - *inbufsize -= cnt; - inbuf += cnt; - data->outbufavail = outchars; - - if (outchars < data->outbufsize) - { - /* If there is still room in the output buffer something - is wrong with the input. */ - if (inwchars >= cnt + sizeof (wchar_t)) - { - /* An error occurred. */ - result = GCONV_ILLEGAL_INPUT; - break; - } - if (inwchars != cnt) - { - /* There are some unprocessed bytes at the end of the - input buffer. */ - result = GCONV_INCOMPLETE_INPUT; - break; - } - } - } - - if (result != GCONV_OK) - break; - - if (data->is_last) - { - /* This is the last step. */ - result = (*inbufsize > (dir == from_iso646 - ? 0 : sizeof (wchar_t) - 1) - ? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT); - break; - } - - /* Status so far. */ - result = GCONV_EMPTY_INPUT; - - if (data->outbufavail > 0) - { - /* Call the functions below in the chain. */ - size_t newavail = data->outbufavail; - - result = (*fct) (next_step, next_data, data->outbuf, &newavail, - written, 0); - - /* Correct the output buffer. */ - if (newavail != data->outbufavail && newavail > 0) - memmove (data->outbuf, - &data->outbuf[data->outbufavail - newavail], - newavail); - data->outbufavail = newavail; - } - } - while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT); - } - - if (written != NULL && data->is_last) - *written += do_write; - - return result; -} +/* First define the conversion function from ASCII to UCS4. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + uint32_t ch; \ + int failure = GCONV_OK; \ + \ + switch (*inptr) \ + { \ + case '\x23': \ + if (var == GB) \ + ch = 0xa3; \ + else \ + ch = 0x23; \ + break; \ + case '\x7e': \ + if (var == GB) \ + ch = 0x203e; \ + else \ + ch = 0x7e; \ + break; \ + default: \ + ch = *inptr; \ + break; \ + case '\x80' ... '\xff': \ + /* Illegal character. */ \ + failure = GCONV_ILLEGAL_INPUT; \ + ch = '\0'; /* OK, gcc, here I initialize the variable. */ \ + break; \ + } \ + \ + /* Hopefully gcc can recognize that the following `if' is only true \ + when we reach the default case in the `switch' statement. */ \ + if (failure == GCONV_ILLEGAL_INPUT) \ + { \ + /* Exit the loop with an error. */ \ + result = failure; \ + break; \ + } \ + *((uint32_t *) outptr)++ = ch; \ + ++inptr; \ + } +#define EXTRA_LOOP_DECLS , enum variant var +#include <iconv/loop.c> + + +/* Next, define the other direction. */ +#define MIN_NEEDED_INPUT MIN_NEEDED_TO +#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +#define LOOPFCT TO_LOOP +#define BODY \ + { \ + unsigned char ch; \ + int failure = GCONV_OK; \ + \ + do \ + { \ + switch (*((uint32_t *) inptr)) \ + { \ + case 0x23: \ + if (var == GB) \ + break; \ + ch = 0x23; \ + continue; \ + case 0x7e: \ + if (var == GB) \ + break; \ + ch = 0x7e; \ + continue; \ + case 0xa3: \ + if (var != GB) \ + break; \ + ch = 0x23; \ + continue; \ + case 0x203e: \ + if (var != GB) \ + break; \ + ch = 0x7e; \ + continue; \ + default: \ + if (*((uint32_t *) inptr) > 0x7f) \ + break; \ + ch = (unsigned char) *((uint32_t *) inptr); \ + continue; \ + } \ + /* When we come to this place we saw an illegal character. */ \ + failure = GCONV_ILLEGAL_INPUT; \ + ch = '\0'; /* OK, gcc, here I initialize the variable. */ \ + } \ + while (0); \ + \ + /* Hopefully gcc can recognize that the following `if' is only true \ + when we fall through the `switch' statement. */ \ + if (failure == GCONV_ILLEGAL_INPUT) \ + { \ + /* Exit the loop with an error. */ \ + result = failure; \ + break; \ + } \ + *outptr++ = ch; \ + inptr += 4; \ + } +#define EXTRA_LOOP_DECLS , enum variant var +#include <iconv/loop.c> + + +/* Now define the toplevel functions. */ +#include <iconv/skeleton.c> diff --git a/iconvdata/testdata/ANSI_X3.4-1968 b/iconvdata/testdata/ANSI_X3.4-1968 new file mode 100644 index 0000000000..7b7da5f318 --- /dev/null +++ b/iconvdata/testdata/ANSI_X3.4-1968 @@ -0,0 +1,6 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ diff --git a/iconvdata/testdata/BS_4730 b/iconvdata/testdata/BS_4730 new file mode 100644 index 0000000000..7b7da5f318 --- /dev/null +++ b/iconvdata/testdata/BS_4730 @@ -0,0 +1,6 @@ + ! " # $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ~ diff --git a/iconvdata/testdata/BS_4730..UTF8 b/iconvdata/testdata/BS_4730..UTF8 new file mode 100644 index 0000000000..0dc3ff3117 --- /dev/null +++ b/iconvdata/testdata/BS_4730..UTF8 @@ -0,0 +1,6 @@ + ! " £ $ % & ' ( ) * + , - . / + 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O + P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o + p q r s t u v w x y z { | } ‾ |