aboutsummaryrefslogtreecommitdiff
path: root/REORG.TODO/iconv/loop.c
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/iconv/loop.c')
-rw-r--r--REORG.TODO/iconv/loop.c523
1 files changed, 523 insertions, 0 deletions
diff --git a/REORG.TODO/iconv/loop.c b/REORG.TODO/iconv/loop.c
new file mode 100644
index 0000000000..0160f72cd6
--- /dev/null
+++ b/REORG.TODO/iconv/loop.c
@@ -0,0 +1,523 @@
+/* Conversion loop frame work.
+ Copyright (C) 1998-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file provides a frame for the reader loop in all conversion modules.
+ The actual code must (of course) be provided in the actual module source
+ code but certain actions can be written down generically, with some
+ customization options which are these:
+
+ MIN_NEEDED_INPUT minimal number of input bytes needed for the next
+ conversion.
+ MIN_NEEDED_OUTPUT minimal number of bytes produced by the next round
+ of conversion.
+
+ MAX_NEEDED_INPUT you guess it, this is the maximal number of input
+ bytes needed. It defaults to MIN_NEEDED_INPUT
+ MAX_NEEDED_OUTPUT likewise for output bytes.
+
+ LOOPFCT name of the function created. If not specified
+ the name is `loop' but this prevents the use
+ of multiple functions in the same file.
+
+ BODY this is supposed to expand to the body of the loop.
+ The user must provide this.
+
+ EXTRA_LOOP_DECLS extra arguments passed from conversion loop call.
+
+ INIT_PARAMS code to define and initialize variables from params.
+ UPDATE_PARAMS code to store result in params.
+
+ ONEBYTE_BODY body of the specialized conversion function for a
+ single byte from the current character set to INTERNAL.
+*/
+
+#include <assert.h>
+#include <endian.h>
+#include <gconv.h>
+#include <stdint.h>
+#include <string.h>
+#include <wchar.h>
+#include <sys/param.h> /* For MIN. */
+#define __need_size_t
+#include <stddef.h>
+#include <libc-diag.h>
+
+/* We have to provide support for machines which are not able to handled
+ unaligned memory accesses. Some of the character encodings have
+ representations with a fixed width of 2 or 4 bytes. But if we cannot
+ access unaligned memory we still have to read byte-wise. */
+#undef FCTNAME2
+#if _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED
+/* We can handle unaligned memory access. */
+# define get16(addr) *((const uint16_t *) (addr))
+# define get32(addr) *((const uint32_t *) (addr))
+
+/* We need no special support for writing values either. */
+# define put16(addr, val) *((uint16_t *) (addr)) = (val)
+# define put32(addr, val) *((uint32_t *) (addr)) = (val)
+
+# define FCTNAME2(name) name
+#else
+/* Distinguish between big endian and little endian. */
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+# define get16(addr) \
+ (((const unsigned char *) (addr))[1] << 8 \
+ | ((const unsigned char *) (addr))[0])
+# define get32(addr) \
+ (((((const unsigned char *) (addr))[3] << 8 \
+ | ((const unsigned char *) (addr))[2]) << 8 \
+ | ((const unsigned char *) (addr))[1]) << 8 \
+ | ((const unsigned char *) (addr))[0])
+
+# define put16(addr, val) \
+ ({ uint16_t __val = (val); \
+ ((unsigned char *) (addr))[0] = __val; \
+ ((unsigned char *) (addr))[1] = __val >> 8; \
+ (void) 0; })
+# define put32(addr, val) \
+ ({ uint32_t __val = (val); \
+ ((unsigned char *) (addr))[0] = __val; \
+ __val >>= 8; \
+ ((unsigned char *) (addr))[1] = __val; \
+ __val >>= 8; \
+ ((unsigned char *) (addr))[2] = __val; \
+ __val >>= 8; \
+ ((unsigned char *) (addr))[3] = __val; \
+ (void) 0; })
+# else
+# define get16(addr) \
+ (((const unsigned char *) (addr))[0] << 8 \
+ | ((const unsigned char *) (addr))[1])
+# define get32(addr) \
+ (((((const unsigned char *) (addr))[0] << 8 \
+ | ((const unsigned char *) (addr))[1]) << 8 \
+ | ((const unsigned char *) (addr))[2]) << 8 \
+ | ((const unsigned char *) (addr))[3])
+
+# define put16(addr, val) \
+ ({ uint16_t __val = (val); \
+ ((unsigned char *) (addr))[1] = __val; \
+ ((unsigned char *) (addr))[0] = __val >> 8; \
+ (void) 0; })
+# define put32(addr, val) \
+ ({ uint32_t __val = (val); \
+ ((unsigned char *) (addr))[3] = __val; \
+ __val >>= 8; \
+ ((unsigned char *) (addr))[2] = __val; \
+ __val >>= 8; \
+ ((unsigned char *) (addr))[1] = __val; \
+ __val >>= 8; \
+ ((unsigned char *) (addr))[0] = __val; \
+ (void) 0; })
+# endif
+
+# define FCTNAME2(name) name##_unaligned
+#endif
+#define FCTNAME(name) FCTNAME2(name)
+
+
+/* We need at least one byte for the next round. */
+#ifndef MIN_NEEDED_INPUT
+# error "MIN_NEEDED_INPUT definition missing"
+#elif MIN_NEEDED_INPUT < 1
+# error "MIN_NEEDED_INPUT must be >= 1"
+#endif
+
+/* Let's see how many bytes we produce. */
+#ifndef MAX_NEEDED_INPUT
+# define MAX_NEEDED_INPUT MIN_NEEDED_INPUT
+#endif
+
+/* We produce at least one byte in the next round. */
+#ifndef MIN_NEEDED_OUTPUT
+# error "MIN_NEEDED_OUTPUT definition missing"
+#elif MIN_NEEDED_OUTPUT < 1
+# error "MIN_NEEDED_OUTPUT must be >= 1"
+#endif
+
+/* Let's see how many bytes we produce. */
+#ifndef MAX_NEEDED_OUTPUT
+# define MAX_NEEDED_OUTPUT MIN_NEEDED_OUTPUT
+#endif
+
+/* Default name for the function. */
+#ifndef LOOPFCT
+# define LOOPFCT loop
+#endif
+
+/* Make sure we have a loop body. */
+#ifndef BODY
+# error "Definition of BODY missing for function" LOOPFCT
+#endif
+
+
+/* If no arguments have to passed to the loop function define the macro
+ as empty. */
+#ifndef EXTRA_LOOP_DECLS
+# define EXTRA_LOOP_DECLS
+#endif
+
+/* Allow using UPDATE_PARAMS in macros where #ifdef UPDATE_PARAMS test
+ isn't possible. */
+#ifndef UPDATE_PARAMS
+# define UPDATE_PARAMS do { } while (0)
+#endif
+#ifndef REINIT_PARAMS
+# define REINIT_PARAMS do { } while (0)
+#endif
+
+
+/* To make it easier for the writers of the modules, we define a macro
+ to test whether we have to ignore errors. */
+#define ignore_errors_p() \
+ (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
+
+
+/* Error handling for the FROM_LOOP direction, with ignoring of errors.
+ Note that we cannot use the do while (0) trick since `break' and
+ `continue' must reach certain points. */
+#define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
+ \
+ if (! ignore_errors_p ()) \
+ break; \
+ \
+ /* We ignore the invalid input byte sequence. */ \
+ inptr += (Incr); \
+ ++*irreversible; \
+ /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
+ that "iconv -c" must give the same exitcode as "iconv". */ \
+ continue; \
+ }
+
+/* Error handling for the TO_LOOP direction, with use of transliteration/
+ transcription functions and ignoring of errors. Note that we cannot use
+ the do while (0) trick since `break' and `continue' must reach certain
+ points. */
+#define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \
+ { \
+ result = __GCONV_ILLEGAL_INPUT; \
+ \
+ if (irreversible == NULL) \
+ /* This means we are in call from __gconv_transliterate. In this \
+ case we are not doing any error recovery outself. */ \
+ break; \
+ \
+ /* If needed, flush any conversion state, so that __gconv_transliterate \
+ starts with current shift state. */ \
+ UPDATE_PARAMS; \
+ \
+ /* First try the transliteration methods. */ \
+ if ((step_data->__flags & __GCONV_TRANSLIT) != 0) \
+ result = __gconv_transliterate \
+ (step, step_data, *inptrp, \
+ &inptr, inend, &outptr, irreversible); \
+ \
+ REINIT_PARAMS; \
+ \
+ /* If any of them recognized the input continue with the loop. */ \
+ if (result != __GCONV_ILLEGAL_INPUT) \
+ { \
+ if (__glibc_unlikely (result == __GCONV_FULL_OUTPUT)) \
+ break; \
+ \
+ continue; \
+ } \
+ \
+ /* Next see whether we have to ignore the error. If not, stop. */ \
+ if (! ignore_errors_p ()) \
+ break; \
+ \
+ /* When we come here it means we ignore the character. */ \
+ ++*irreversible; \
+ inptr += Incr; \
+ /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
+ that "iconv -c" must give the same exitcode as "iconv". */ \
+ continue; \
+ }
+
+
+/* Handling of Unicode 3.1 TAG characters. Unicode recommends
+ "If language codes are not relevant to the particular processing
+ operation, then they should be ignored." This macro is usually
+ called right before STANDARD_TO_LOOP_ERR_HANDLER (Incr). */
+#define UNICODE_TAG_HANDLER(Character, Incr) \
+ { \
+ /* TAG characters are those in the range U+E0000..U+E007F. */ \
+ if (((Character) >> 7) == (0xe0000 >> 7)) \
+ { \
+ inptr += Incr; \
+ continue; \
+ } \
+ }
+
+
+/* The function returns the status, as defined in gconv.h. */
+static inline int
+__attribute ((always_inline))
+FCTNAME (LOOPFCT) (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp, const unsigned char *inend,
+ unsigned char **outptrp, const unsigned char *outend,
+ size_t *irreversible EXTRA_LOOP_DECLS)
+{
+#ifdef LOOP_NEED_STATE
+ mbstate_t *state = step_data->__statep;
+#endif
+#ifdef LOOP_NEED_FLAGS
+ int flags = step_data->__flags;
+#endif
+#ifdef LOOP_NEED_DATA
+ void *data = step->__data;
+#endif
+ int result = __GCONV_EMPTY_INPUT;
+ const unsigned char *inptr = *inptrp;
+ unsigned char *outptr = *outptrp;
+
+#ifdef INIT_PARAMS
+ INIT_PARAMS;
+#endif
+
+ while (inptr != inend)
+ {
+ /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
+ compiler generating better code. They will be optimized away
+ since MIN_NEEDED_OUTPUT is always a constant. */
+ if (MIN_NEEDED_INPUT > 1
+ && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
+ {
+ /* We don't have enough input for another complete input
+ character. */
+ result = __GCONV_INCOMPLETE_INPUT;
+ break;
+ }
+ if ((MIN_NEEDED_OUTPUT != 1
+ && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
+ || (MIN_NEEDED_OUTPUT == 1
+ && __builtin_expect (outptr >= outend, 0)))
+ {
+ /* Overflow in the output buffer. */
+ result = __GCONV_FULL_OUTPUT;
+ break;
+ }
+
+ /* Here comes the body the user provides. It can stop with
+ RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
+ input characters vary in size), GCONV_ILLEGAL_INPUT, or
+ GCONV_FULL_OUTPUT (if the output characters vary in size). */
+ BODY
+ }
+
+ /* Update the pointers pointed to by the parameters. */
+ *inptrp = inptr;
+ *outptrp = outptr;
+ UPDATE_PARAMS;
+
+ return result;
+}
+
+
+/* Include the file a second time to define the function to handle
+ unaligned access. */
+#if !defined DEFINE_UNALIGNED && !_STRING_ARCH_unaligned \
+ && MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \
+ && MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0
+# undef get16
+# undef get32
+# undef put16
+# undef put32
+# undef unaligned
+
+# define DEFINE_UNALIGNED
+# include "loop.c"
+# undef DEFINE_UNALIGNED
+#else
+# if MAX_NEEDED_INPUT > 1
+# define SINGLE(fct) SINGLE2 (fct)
+# define SINGLE2(fct) fct##_single
+static inline int
+__attribute ((always_inline))
+SINGLE(LOOPFCT) (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char **inptrp, const unsigned char *inend,
+ unsigned char **outptrp, unsigned char *outend,
+ size_t *irreversible EXTRA_LOOP_DECLS)
+{
+ mbstate_t *state = step_data->__statep;
+# ifdef LOOP_NEED_FLAGS
+ int flags = step_data->__flags;
+# endif
+# ifdef LOOP_NEED_DATA
+ void *data = step->__data;
+# endif
+ int result = __GCONV_OK;
+ unsigned char bytebuf[MAX_NEEDED_INPUT];
+ const unsigned char *inptr = *inptrp;
+ unsigned char *outptr = *outptrp;
+ size_t inlen;
+
+# ifdef INIT_PARAMS
+ INIT_PARAMS;
+# endif
+
+# ifdef UNPACK_BYTES
+ UNPACK_BYTES
+# else
+ /* Add the bytes from the state to the input buffer. */
+ assert ((state->__count & 7) <= sizeof (state->__value));
+ for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen)
+ bytebuf[inlen] = state->__value.__wchb[inlen];
+# endif
+
+ /* Are there enough bytes in the input buffer? */
+ if (MIN_NEEDED_INPUT > 1
+ && __builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
+ {
+ *inptrp = inend;
+# ifdef STORE_REST
+
+ /* Building with -O3 GCC emits a `array subscript is above array
+ bounds' warning. GCC BZ #64739 has been opened for this. */
+ DIAG_PUSH_NEEDS_COMMENT;
+ DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Warray-bounds");
+ while (inptr < inend)
+ bytebuf[inlen++] = *inptr++;
+ DIAG_POP_NEEDS_COMMENT;
+
+ inptr = bytebuf;
+ inptrp = &inptr;
+ inend = &bytebuf[inlen];
+
+ STORE_REST
+# else
+ /* We don't have enough input for another complete input
+ character. */
+ while (inptr < inend)
+ state->__value.__wchb[inlen++] = *inptr++;
+# endif
+
+ return __GCONV_INCOMPLETE_INPUT;
+ }
+
+ /* Enough space in output buffer. */
+ if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend)
+ || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend))
+ /* Overflow in the output buffer. */
+ return __GCONV_FULL_OUTPUT;
+
+ /* Now add characters from the normal input buffer. */
+ do
+ bytebuf[inlen++] = *inptr++;
+ while (inlen < MAX_NEEDED_INPUT && inptr < inend);
+
+ inptr = bytebuf;
+ inend = &bytebuf[inlen];
+
+ do
+ {
+ BODY
+ }
+ while (0);
+
+ /* Now we either have produced an output character and consumed all the
+ bytes from the state and at least one more, or the character is still
+ incomplete, or we have some other error (like illegal input character,
+ no space in output buffer). */
+ if (__glibc_likely (inptr != bytebuf))
+ {
+ /* We found a new character. */
+ assert (inptr - bytebuf > (state->__count & 7));
+
+ *inptrp += inptr - bytebuf - (state->__count & 7);
+ *outptrp = outptr;
+
+ result = __GCONV_OK;
+
+ /* Clear the state buffer. */
+# ifdef CLEAR_STATE
+ CLEAR_STATE;
+# else
+ state->__count &= ~7;
+# endif
+ }
+ else if (result == __GCONV_INCOMPLETE_INPUT)
+ {
+ /* This can only happen if we have less than MAX_NEEDED_INPUT bytes
+ available. */
+ assert (inend != &bytebuf[MAX_NEEDED_INPUT]);
+
+ *inptrp += inend - bytebuf - (state->__count & 7);
+# ifdef STORE_REST
+ inptrp = &inptr;
+
+ STORE_REST
+# else
+ /* We don't have enough input for another complete input
+ character. */
+ assert (inend - inptr > (state->__count & ~7));
+ assert (inend - inptr <= sizeof (state->__value));
+ state->__count = (state->__count & ~7) | (inend - inptr);
+ inlen = 0;
+ while (inptr < inend)
+ state->__value.__wchb[inlen++] = *inptr++;
+# endif
+ }
+
+ return result;
+}
+# undef SINGLE
+# undef SINGLE2
+# endif
+
+
+# ifdef ONEBYTE_BODY
+/* Define the shortcut function for btowc. */
+static wint_t
+gconv_btowc (struct __gconv_step *step, unsigned char c)
+ ONEBYTE_BODY
+# define FROM_ONEBYTE gconv_btowc
+# endif
+
+#endif
+
+/* We remove the macro definitions so that we can include this file again
+ for the definition of another function. */
+#undef MIN_NEEDED_INPUT
+#undef MAX_NEEDED_INPUT
+#undef MIN_NEEDED_OUTPUT
+#undef MAX_NEEDED_OUTPUT
+#undef LOOPFCT
+#undef BODY
+#undef LOOPFCT
+#undef EXTRA_LOOP_DECLS
+#undef INIT_PARAMS
+#undef UPDATE_PARAMS
+#undef REINIT_PARAMS
+#undef ONEBYTE_BODY
+#undef UNPACK_BYTES
+#undef CLEAR_STATE
+#undef LOOP_NEED_STATE
+#undef LOOP_NEED_FLAGS
+#undef LOOP_NEED_DATA
+#undef get16
+#undef get32
+#undef put16
+#undef put32
+#undef unaligned